class JobTagRequirement(db.Model, UtilityMixins): """ Model representing a dependency of a job on a tag If a job has a tag requirement, it will only run on agents that have that tag. """ __tablename__ = config.get("table_job_tag_req") __table_args__ = (UniqueConstraint("tag_id", "job_id"), ) id = id_column() tag_id = db.Column(db.Integer, db.ForeignKey("%s.id" % config.get("table_tag")), nullable=False, doc="Reference to the required tag") job_id = db.Column(IDTypeWork, db.ForeignKey("%s.id" % config.get("table_job")), nullable=False, doc="Foreign key to :class:`Job.id`") negate = db.Column( db.Boolean, nullable=False, default=False, doc="If true, an agent that has this tag can not work on this job") job = db.relationship("Job", backref=db.backref("tag_requirements", lazy="dynamic", cascade="all, delete-orphan")) tag = db.relationship("Tag")
class TaskCount(db.Model): __bind_key__ = 'statistics' __tablename__ = config.get("table_statistics_task_count") id = id_column(db.Integer) counted_time = db.Column( db.DateTime, nullable=False, default=datetime.utcnow, doc="The point in time at which these counts were done") # No foreign key reference, because this table is stored in a separate db # Code reading it will have to check for referential integrity manually. job_queue_id = db.Column(db.Integer, nullable=True, doc="ID of the jobqueue these stats refer to") total_queued = db.Column(db.Integer, nullable=False, doc="Number of queued tasks at `counted_time`") total_running = db.Column(db.Integer, nullable=False, doc="Number of running tasks at `counted_time`") total_done = db.Column(db.Integer, nullable=False, doc="Number of done tasks at `counted_time`") total_failed = db.Column(db.Integer, nullable=False, doc="Number of failed tasks at `counted_time`")
class Software(db.Model, UtilityMixins): """ Model to represent a versioned piece of software that can be present on an agent and may be depended on by a job and/or jobtype .. note:: This table enforces two forms of uniqueness. The :attr:`id` column must be unique and the combination of these columns must also be unique to limit the frequency of duplicate data: * :attr:`software` * :attr:`version` .. autoattribute:: job_id """ __tablename__ = TABLE_SOFTWARE __table_args__ = ( UniqueConstraint("software", "version"), ) id = id_column() software = db.Column(db.String(MAX_TAG_LENGTH), nullable=False, doc=dedent(""" The name of the software""")) version = db.Column(db.String(MAX_TAG_LENGTH), default="any", nullable=False, doc=dedent(""" The version of the software. This value does not follow any special formatting rules because the format depends on the 3rd party."""))
class JobType(db.Model): """ Stores the unique information necessary to execute a task """ __tablename__ = TABLE_JOB_TYPE id = id_column(db.Integer) name = db.Column(db.String(MAX_JOBTYPE_LENGTH), nullable=False, doc=dedent(""" The name of the job type. This can be either a human readable name or the name of the job type class itself.""")) description = db.Column(db.Text, nullable=True, doc=dedent(""" Human readable description of the job type. This field is not required and is not directly relied upon anywhere.""")) classname = db.Column(db.String(MAX_JOBTYPE_LENGTH), nullable=True, doc=dedent(""" The name of the job class contained within the file being loaded. This field may be null but when it's not provided :attr:`name` will be used instead.""")) code = db.Column(db.UnicodeText, nullable=False, doc=dedent(""" General field containing the 'code' to retrieve the job type. See below for information on what this field will contain depending on how the job will be loaded.""")) mode = db.Column(JobTypeLoadModeEnum, default=JobTypeLoadMode.IMPORT, nullable=False, doc=dedent(""" Indicates how the job type should be retrieved. .. csv-table:: **JobTypeLoadMode Enums** :header: Value, Result :widths: 10, 50 DOWNLOAD, job type will be downloaded remotely IMPORT, the remote agent will import the job type OPEN, code is loaded directly from a file on disk""")) jobs = db.relationship("Job", backref="job_type", lazy="dynamic", doc=dedent(""" Relationship between this jobtype and :class:`.Job` objects.""")) @validates("mode") def validates_mode(self, key, value): """ensures the value provided to :attr:`mode` is valid""" if value not in JobTypeLoadMode: raise ValueError("invalid value for mode") return value
class GPU(db.Model, UtilityMixins, ReprMixin): __tablename__ = config.get("table_gpu") __table_args__ = (UniqueConstraint("fullname"), ) id = id_column(db.Integer) fullname = db.Column(db.String(config.get("max_gpu_name_length")), nullable=False, doc="The full name of this graphics card model")
class Tag(db.Model, UtilityMixins): """ Model which provides tagging for :class:`.Job` and class:`.Agent` objects """ __tablename__ = TABLE_TAG __table_args__ = (UniqueConstraint("tag"), ) id = id_column() tag = db.Column(db.String(MAX_TAG_LENGTH), nullable=False, doc=dedent("""The actual value of the tag"""))
class TaskEventCount(db.Model): __bind_key__ = 'statistics' __tablename__ = config.get("table_statistics_task_event_count") id = id_column(db.Integer) time_start = db.Column(db.DateTime, nullable=False, default=datetime.utcnow) time_end = db.Column(db.DateTime, nullable=False, default=datetime.utcnow) # No foreign key reference, because this table is stored in a separate db # Code reading it will have to check for referential integrity manually. job_queue_id = db.Column(db.Integer, nullable=True, doc="ID of the jobqueue these stats refer to") num_new = db.Column( db.Integer, nullable=False, default=0, doc="Number of tasks that were newly created during the time period") num_deleted = db.Column( db.Integer, nullable=False, default=0, doc="Number of tasks that were deleted during the time period") num_restarted = db.Column( db.Integer, nullable=False, default=0, doc="Number of tasks that were restarted during the time period") num_started = db.Column( db.Integer, nullable=False, default=0, doc="Number of tasks that work was started on during the time period") num_failed = db.Column( db.Integer, nullable=False, default=0, doc="Number of tasks that failed during the time period") num_done = db.Column( db.Integer, nullable=False, default=0, doc="Number of tasks that were finished successfully during the time " "period")
class JobType(db.Model, UtilityMixins, ReprMixin): """ Stores the unique information necessary to execute a task """ __tablename__ = config.get("table_job_type") __table_args__ = (UniqueConstraint("name"), ) REPR_COLUMNS = ("id", "name") id = id_column(IDTypeWork) name = db.Column( db.String(config.get("job_type_max_name_length")), nullable=False, doc="The name of the job type. This can be either a human " "readable name or the name of the job type class itself.") description = db.Column( db.Text, nullable=True, doc="Human readable description of the job type. This field is not " "required and is not directly relied upon anywhere.") success_subject = db.Column( db.Text, nullable=True, doc="The subject line to use for notifications in case of " "success. Some substitutions, for example for the job title, " "are available.") success_body = db.Column(db.Text, nullable=True, doc="The email body to use for notifications in " "in case of success. Some substitutions, for " "example for the job title, are available.") fail_subject = db.Column(db.Text, nullable=True, doc="The subject line to use for notifications " "in case of failure. Some substitutions, for " "example for the job title, are available.") fail_body = db.Column(db.Text, nullable=True, doc="The email body to use for notifications in " "in case of success. Some substitutions, for " "example for the job title, are available.") @validates("name") def validate_name(self, key, value): if value == "": raise ValueError("Name cannot be empty") return value
class Tag(db.Model, UtilityMixins): """ Model which provides tagging for :class:`.Job` and class:`.Agent` objects """ __tablename__ = config.get("table_tag") __table_args__ = (UniqueConstraint("tag"), ) id = id_column() tag = db.Column(db.String(config.get("max_tag_length")), nullable=False, doc="The actual value of the tag")
class TaskLog(db.Model, UtilityMixins, ReprMixin): """Table which represents a single task log entry""" __tablename__ = config.get("table_task_log") __table_args__ = (UniqueConstraint("identifier"),) id = id_column(db.Integer) identifier = db.Column( db.String(255), nullable=False, doc="The identifier for this log") agent_id = db.Column( IDTypeAgent, db.ForeignKey("%s.id" % config.get("table_agent")), nullable=True, doc="The agent this log was created on") created_on = db.Column( db.DateTime, default=datetime.utcnow, doc="The time when this log was created") # # Relationships # agent = db.relationship( "Agent", backref=db.backref("task_logs", lazy="dynamic"), doc="Relationship between an :class:`TaskLog`" "and the :class:`pyfarm.models.Agent` it was " "created on") task_associations = db.relationship( TaskTaskLogAssociation, backref="log", doc="Relationship between tasks and their logs." ) def num_queued_tasks(self): return TaskTaskLogAssociation.query.filter_by( log=self, state=None).count() def num_running_tasks(self): return TaskTaskLogAssociation.query.filter_by( log=self, state=WorkState.RUNNING).count() def num_failed_tasks(self): return TaskTaskLogAssociation.query.filter_by( log=self, state=WorkState.FAILED).count() def num_done_tasks(self): return TaskTaskLogAssociation.query.filter_by( log=self, state=WorkState.DONE).count()
class SoftwareVersion(db.Model, UtilityMixins): """ Model to represent a version for a given software """ __tablename__ = config.get("table_software_version") __table_args__ = (UniqueConstraint("software_id", "version"), UniqueConstraint("software_id", "rank")) id = id_column() software_id = db.Column(db.Integer, db.ForeignKey("%s.id" % config.get("table_software")), nullable=False, doc="The software this version belongs to") version = db.Column( db.String(config.get("max_tag_length")), default="any", nullable=False, doc="The version of the software. This value does not " "follow any special formatting rules because the " "format depends on the 3rd party.") rank = db.Column( db.Integer, nullable=False, doc="The rank of this version relative to other versions of " "the same software. Used to determine whether a version " "is higher or lower than another.") default = db.Column(db.Boolean, default=False, nullable=False, doc="If true, this software version will be registered" "on new nodes by default.") discovery_code = db.Column( db.UnicodeText, nullable=True, doc="Python code to discover if this software version is installed " "on a node") discovery_function_name = db.Column( db.String(config.get("max_discovery_function_name_length")), nullable=True, doc="The name of a function in `discovery_code` to call when " "checking for the presence of this software version on an agent.\n" "The function should return either a boolean (true if present, " "false if not) or a tuple of a boolean and a dict of named " "parameters describing this installation.")
class JobSoftwareRequirement(db.Model, UtilityMixins): """ Model representing a dependency of a job on a software tag, with optional version constraints """ __tablename__ = config.get("table_job_software_req") __table_args__ = (UniqueConstraint("software_id", "job_id"), ) id = id_column() software_id = db.Column(db.Integer, db.ForeignKey("%s.id" % config.get("table_software")), nullable=False, doc="Reference to the required software") job_id = db.Column(IDTypeWork, db.ForeignKey("%s.id" % config.get("table_job")), nullable=False, doc="Foreign key to :class:`Job.id`") min_version_id = db.Column( db.Integer, db.ForeignKey("%s.id" % config.get("table_software_version")), nullable=True, doc="Reference to the minimum required version") max_version_id = db.Column( db.Integer, db.ForeignKey("%s.id" % config.get("table_software_version")), nullable=True, doc="Reference to the maximum required version") # # Relationships # job = db.relationship("Job", backref=db.backref("software_requirements", lazy="dynamic", cascade="all, delete-orphan")) software = db.relationship("Software") min_version = db.relationship("SoftwareVersion", foreign_keys=[min_version_id]) max_version = db.relationship("SoftwareVersion", foreign_keys=[max_version_id])
def work_columns(state_default, priority_default): """ Produces some default columns which are used by models which produce work. Currently this includes |Job| and |Task| """ return ( # id id_column(IDTypeWork), # state db.Column(WorkStateEnum, default=state_default, doc=dedent(""" The state of the job with a value provided by :class:`.WorkState`""")), # priority db.Column(db.Integer, default=DEFAULT_PRIORITY, doc=dedent(""" The priority of the job relative to others in the queue. This is not the same as task priority. **configured by**: `%s`""" % priority_default)), # time_submitted db.Column(db.DateTime, default=datetime.now, doc=dedent(""" The time the job was submitted. By default this defaults to using :meth:`datetime.datetime.now` as the source of submission time. This value will not be set more than once and will not change even after a job is requeued.""")), # time_started db.Column(db.DateTime, doc=dedent(""" The time this job was started. By default this value is set when :attr:`state` is changed to an appropriate value or when a job is requeued.""")), # time_finished db.Column(db.DateTime, doc=dedent(""" Time the job was finished. This will be set when the last task finishes and reset if a job is requeued.""")))
def work_columns(state_default, priority_default): """ Produces some default columns which are used by models which produce work. """ return ( # id id_column(IDTypeWork), # state db.Column(WorkStateEnum, default=state_default, doc=dedent(""" The state of the job with a value provided by :class:`.WorkState`""")), # priority db.Column(db.Integer, default=DEFAULT_PRIORITY, doc=dedent(""" The priority of the job relative to others in the queue. This is not the same as task priority. **configured by**: `%s`""" % priority_default)), # time_submitted db.Column(db.DateTime, default=datetime.utcnow, doc=dedent(""" The time the job was submitted. By default this defaults to using :meth:`datetime.datetime.utcnow` as the source of submission time. This value will not be set more than once and will not change even after a job is requeued.""")), # time_started db.Column(db.DateTime, doc=dedent(""" The time this job was started. By default this value is set when :attr:`state` is changed to an appropriate value or when a job is requeued.""")), # time_finished db.Column(db.DateTime, doc=dedent(""" Time the job was finished. This will be set when the last task finishes and reset if a job is requeued.""")) )
class PathMap(db.Model, ReprMixin, UtilityMixins): """ Defines a table which is used for cross-platform file path mappings. """ __tablename__ = config.get("table_path_map") id = id_column(db.Integer) path_linux = db.Column( db.String(config.get("max_path_length")), nullable=False, doc="The path on linux platforms") path_windows = db.Column( db.String(config.get("max_path_length")), nullable=False, doc="The path on Windows platforms") path_osx = db.Column( db.String(config.get("max_path_length")), nullable=False, doc="The path on Mac OS X platforms") tag_id = db.Column( db.Integer, db.ForeignKey("%s.id" % config.get("table_tag")), nullable=True, doc="The tag an agent needs to have for this path map " "to apply to it. " "If this is NULL, this path map applies to all " "agents, but is overridden by applying path maps " "that do specify a tag.") # # Relationships # tag = db.relationship( "Tag", backref=db.backref("path_maps", lazy="dynamic"), doc="Relationship attribute for the tag this path map " "applies to.")
class JobGroup(db.Model, UtilityMixins): """ Used to group jobs together for better presentation in the UI """ __tablename__ = config.get("table_job_group") id = id_column(IDTypeWork) title = db.Column( db.String(config.get("max_jobgroup_name_length")), nullable=False, doc="The title of the job group's name") main_jobtype_id = db.Column( IDTypeWork, db.ForeignKey("%s.id" % config.get("table_job_type")), nullable=False, doc="ID of the jobtype of the main job in this " "group. Purely for display and filtering.") user_id = db.Column( db.Integer, db.ForeignKey("%s.id" % config.get("table_user")), doc="The id of the user who owns these jobs") # # Relationships # main_jobtype = db.relationship( "JobType", backref=db.backref("jobgroups", lazy="dynamic"), doc="The jobtype of the main job in this group") user = db.relationship( "User", backref=db.backref("jobgroups", lazy="dynamic"), doc="The user who owns these jobs")
class Software(db.Model, UtilityMixins): """ Model to represent a versioned piece of software that can be present on an agent and may be depended on by a job and/or jobtype through the appropriate SoftwareRequirement table """ __tablename__ = config.get("table_software") __table_args__ = (UniqueConstraint("software"), ) id = id_column() software = db.Column(db.String(config.get("max_tag_length")), nullable=False, doc="The name of the software") # # Relationships # versions = db.relationship("SoftwareVersion", backref=db.backref("software"), lazy="dynamic", order_by="asc(SoftwareVersion.rank)", cascade="all, delete-orphan", doc="All known versions of this software")
def test_integer(self): column = id_column(db.Integer) self.assertIsInstance(column.type, db.Integer) self.assertTrue(column.primary_key) self.assertFalse(column.nullable) self.assertTrue(column.autoincrement)
def test_id_column(self): column = id_column(GUID) self.assertIsInstance(column.type, GUID) self.assertTrue(column.primary_key) self.assertFalse(column.nullable) self.assertTrue(column.autoincrement)
class JobQueue(db.Model, UtilityMixins, ReprMixin): """ Stores information about a job queue. Used for flexible, configurable distribution of computing capacity to jobs. """ __tablename__ = config.get("table_job_queue") __table_args__ = (UniqueConstraint("parent_jobqueue_id", "name"), ) REPR_COLUMNS = ("id", "name") id = id_column(IDTypeWork) parent_jobqueue_id = db.Column( IDTypeWork, db.ForeignKey("%s.id" % config.get("table_job_queue")), nullable=True, doc="The parent queue of this queue. If NULL, this is a top " "level queue.") name = db.Column(db.String(config.get("max_queue_name_length")), nullable=False, doc="The name of the job queue") minimum_agents = db.Column( db.Integer, nullable=True, doc="The scheduler will try to assign at least this number of " "agents to jobs in or below this queue as long as it " "can use them, before any other considerations.") maximum_agents = db.Column( db.Integer, nullable=True, doc="The scheduler will never assign more than this number of " "agents to jobs in or below this queue.") priority = db.Column( db.Integer, nullable=False, default=config.get("queue_default_priority"), doc="The priority of this job queue. The scheduler will not " "assign any nodes to other job queues or jobs with the " "same parent and a lower priority as long as this one " "can still use nodes. The minimum_agents column takes " "precedence over this.") weight = db.Column(db.Integer, nullable=False, default=config.get("queue_default_weight"), doc="The weight of this job queue. The scheduler will " "distribute available agents between jobs and job " "queues in the same queue in proportion to their " "weights.") fullpath = db.Column(db.String(config.get("max_queue_path_length")), doc="The path of this jobqueue. This column is a " "database denormalization. It is technically " "redundant, but faster to access than recursively " "querying all parent queues. If set to NULL, the " "path must be computed by recursively querying " "the parent queues.") # # Relationship # parent = db.relationship("JobQueue", remote_side=[id], backref=db.backref("children", lazy="dynamic"), doc="Relationship between this queue its parent") def path(self): # Import here instead of at the top to break circular dependency from pyfarm.scheduler.tasks import cache_jobqueue_path if self.fullpath: return self.fullpath else: cache_jobqueue_path.delay(self.id) path = "/%s" % (self.name or "") if self.parent: return self.parent.path() + path else: return path def child_queues_sorted(self): """ Return child queues sorted by number of currently assigned agents with priority as a secondary sort key. """ queues = [x for x in self.children] return sorted(queues, key=lambda x: x.num_assigned_agents(), reverse=True) def child_jobs(self, filters): # Import down here instead of at the top to avoid circular import from pyfarm.models.job import Job jobs_query = Job.query if self.id: jobs_query = jobs_query.filter_by(queue=self) wanted_states = [] if filters["state_paused"]: wanted_states.append(WorkState.PAUSED) if filters["state_running"]: wanted_states.append(WorkState.RUNNING) if filters["state_done"]: wanted_states.append(WorkState.DONE) if filters["state_failed"]: wanted_states.append(WorkState.FAILED) if filters["state_queued"]: jobs_query = jobs_query.filter( or_(Job.state == None, Job.state.in_(wanted_states))) else: jobs_query = jobs_query.filter(Job.state.in_(wanted_states)) return sorted(jobs_query.all(), key=lambda x: x.num_assigned_agents(), reverse=True) def num_assigned_agents(self): try: return self.assigned_agents_count except AttributeError: # Import down here instead of at the top to avoid circular import from pyfarm.models.task import Task from pyfarm.models.job import Job self.assigned_agents_count = 0 for queue in self.children: self.assigned_agents_count += queue.num_assigned_agents() self.assigned_agents_count +=\ db.session.query(distinct(Task.agent_id)).\ filter(Task.job.has(Job.queue == self), Task.agent_id != None, Task.agent.has( and_(Agent.state != AgentState.OFFLINE, Agent.state != AgentState.DISABLED)), or_(Task.state == None, Task.state == WorkState.RUNNING)).count() return self.assigned_agents_count def clear_assigned_counts(self): try: del self.assigned_agents_count except AttributeError: pass if self.parent: self.parent.clear_assigned_counts() def get_job_for_agent(self, agent, unwanted_job_ids=None): # Import down here instead of at the top to avoid circular import from pyfarm.models.job import Job supported_types = agent.get_supported_types() if not supported_types: return None available_ram = agent.ram if USE_TOTAL_RAM else agent.free_ram child_jobs = Job.query.filter( or_(Job.state == WorkState.RUNNING, Job.state == None), Job.job_queue_id == self.id, ~Job.parents.any( or_(Job.state == None, Job.state != WorkState.DONE)), Job.jobtype_version_id.in_(supported_types), Job.ram <= available_ram).all() child_jobs = [ x for x in child_jobs if (agent.satisfies_job_requirements(x) and x.id not in unwanted_job_ids) ] if unwanted_job_ids: child_jobs = [ x for x in child_jobs if x.id not in unwanted_job_ids ] child_queues = JobQueue.query.filter( JobQueue.parent_jobqueue_id == self.id).all() # Before anything else, enforce minimums for job in child_jobs: if job.state == _WorkState.RUNNING: if (job.num_assigned_agents() < (job.minimum_agents or 0) and job.num_assigned_agents() < (job.maximum_agents or maxsize) and job.can_use_more_agents()): return job elif job.minimum_agents and job.minimum_agents > 0: return job for queue in child_queues: if (queue.num_assigned_agents() < (queue.minimum_agents or 0) and queue.num_assigned_agents() < (queue.maximum_agents or maxsize)): job = queue.get_job_for_agent(agent, unwanted_job_ids) if job: return job objects_by_priority = {} for queue in child_queues: if queue.priority in objects_by_priority: objects_by_priority[queue.priority] += [queue] else: objects_by_priority[queue.priority] = [queue] for job in child_jobs: if job.priority in objects_by_priority: objects_by_priority[job.priority] += [job] else: objects_by_priority[job.priority] = [job] available_priorities = sorted(objects_by_priority.keys(), reverse=True) # Work through the priorities in descending order for priority in available_priorities: objects = objects_by_priority[priority] active_objects = [ x for x in objects if (type(x) != Job or x.state == _WorkState.RUNNING) ] weight_sum = reduce(lambda a, b: a + b.weight, active_objects, 0) total_assigned = reduce(lambda a, b: a + b.num_assigned_agents(), objects, 0) objects.sort(key=(lambda x: ((float(x.num_assigned_agents( )) / total_assigned) if total_assigned else 0) / ((float( x.weight) / weight_sum) if weight_sum and x.weight else 1))) selected_job = None for item in objects: if isinstance(item, Job): if item.state == _WorkState.RUNNING: if (item.can_use_more_agents() and item.num_assigned_agents() < (item.maximum_agents or maxsize)): if PREFER_RUNNING_JOBS: return item elif (selected_job is None or selected_job.time_submitted > item.time_submitted): selected_job = item elif (selected_job is None or selected_job.time_submitted > item.time_submitted): # If this job is not running yet, remember it, but keep # looking for already running or queued but older jobs selected_job = item if isinstance(item, JobQueue): if (item.num_assigned_agents() < (item.maximum_agents or maxsize)): job = item.get_job_for_agent(agent, unwanted_job_ids) if job: return job if selected_job: return selected_job return None @staticmethod def top_level_unique_check(mapper, connection, target): if target.parent_jobqueue_id is None: count = JobQueue.query.filter_by(parent_jobqueue_id=None, name=target.name).count() if count > 0: raise ValueError("Cannot have two jobqueues named %r at the " "top level" % target.name)
class Agent(db.Model, ValidatePriorityMixin, UtilityMixins, ReprMixin): """ Stores information about an agent include its network address, state, allocation configuration, etc. .. note:: This table enforces two forms of uniqueness. The :attr:`id` column must be unique and the combination of these columns must also be unique to limit the frequency of duplicate data: * :attr:`hostname` * :attr:`ip` * :attr:`port` """ __tablename__ = TABLE_AGENT __table_args__ = (UniqueConstraint("hostname", "ip", "port"), ) STATE_DEFAULT = "online" REPR_COLUMNS = ( "id", "hostname", "state", "ip", "remote_ip", "port", "cpus", "ram", "free_ram") REPR_CONVERT_COLUMN = { "ip": repr_ip, "remote_ip": repr_ip, "state": repr} MIN_PORT = read_env_int("PYFARM_AGENT_MIN_PORT", 1024) MAX_PORT = read_env_int("PYFARM_AGENT_MAX_PORT", 65535) MIN_CPUS = read_env_int("PYFARM_AGENT_MIN_CPUS", 1) MAX_CPUS = read_env_int("PYFARM_AGENT_MAX_CPUS", 256) MIN_RAM = read_env_int("PYFARM_AGENT_MIN_RAM", 16) MAX_RAM = read_env_int("PYFARM_AGENT_MAX_RAM", 262144) # quick check of the configured data assert MIN_PORT >= 1, "$PYFARM_AGENT_MIN_PORT must be > 0" assert MAX_PORT >= 1, "$PYFARM_AGENT_MAX_PORT must be > 0" assert MAX_PORT >= MIN_PORT, "MIN_PORT must be <= MAX_PORT" assert MIN_CPUS >= 1, "$PYFARM_AGENT_MIN_CPUS must be > 0" assert MAX_CPUS >= 1, "$PYFARM_AGENT_MAX_CPUS must be > 0" assert MAX_CPUS >= MIN_CPUS, "MIN_CPUS must be <= MAX_CPUS" assert MIN_RAM >= 1, "$PYFARM_AGENT_MIN_RAM must be > 0" assert MAX_RAM >= 1, "$PYFARM_AGENT_MAX_RAM must be > 0" assert MAX_RAM >= MIN_RAM, "MIN_RAM must be <= MAX_RAM" id = id_column(IDTypeAgent) # basic host attribute information hostname = db.Column(db.String(MAX_HOSTNAME_LENGTH), nullable=False, doc=dedent(""" The hostname we should use to talk to this host. Preferably this value will be the fully qualified name instead of the base hostname alone.""")) ip = db.Column(IPv4Address, nullable=True, doc="The IPv4 network address this host resides on") remote_ip = db.Column(IPv4Address, nullable=True, doc="the remote address which came in with the " "request") use_address = db.Column(UseAgentAddressEnum, nullable=False, default="remote", doc="The address we should use when communicating " "with the agent") ram = db.Column(db.Integer, nullable=False, doc="The amount of ram installed on the agent in megabytes") free_ram = db.Column(db.Integer, nullable=False, doc="The amount of ram which was last considered free") cpus = db.Column(db.Integer, nullable=False, doc="The number of cpus installed on the agent") port = db.Column(db.Integer, nullable=False, doc="The port the agent is currently running on") time_offset = db.Column(db.Integer, nullable=False, default=0, doc="the offset in seconds the agent is from " "an official time server") # host state state = db.Column(AgentStateEnum, default=AgentState.ONLINE, nullable=False, doc=dedent(""" Stores the current state of the host. This value can be changed either by a master telling the host to do something with a task or from the host via REST api.""")) # Max allocation of the two primary resources which `1.0` is 100% # allocation. For `cpu_allocation` 100% allocation typically means # one task per cpu. ram_allocation = db.Column(db.Float, default=read_env_number( "PYFARM_AGENT_RAM_ALLOCATION", .8), doc=dedent(""" The amount of ram the agent is allowed to allocate towards work. A value of 1.0 would mean to let the agent use all of the memory installed on the system when assigning work.""")) cpu_allocation = db.Column(db.Float, default=read_env_number( "PYFARM_AGENT_CPU_ALLOCATION", 1.0), doc=dedent(""" The total amount of cpu space an agent is allowed to process work in. A value of 1.0 would mean an agent can handle as much work as the system could handle given the requirements of a task. For example if an agent has 8 cpus, cpu_allocation is .5, and a task requires 4 cpus then only that task will run on the system.""")) # relationships tasks = db.relationship("Task", backref="agent", lazy="dynamic", doc=dedent(""" Relationship between an :class:`Agent` and any :class:`pyfarm.models.Task` objects""")) tags = db.relationship("Tag", secondary=AgentTagAssociation, backref=db.backref("agents", lazy="dynamic"), lazy="dynamic", doc="Tags associated with this agent") software = db.relationship("Software", secondary=AgentSoftwareAssociation, backref=db.backref("agents", lazy="dynamic"), lazy="dynamic", doc="software this agent has installed or is " "configured for") projects = db.relationship("Project", secondary=AgentProjects, backref=db.backref("agents", lazy="dynamic"), lazy="dynamic", doc="The project or projects this agent is " "associated with. By default an agent " "which is not associated with any projects " "will be a member of all projects.") @classmethod def validate_hostname(cls, key, value): """ Ensures that the hostname provided by `value` matches a regular expression that expresses what a valid hostname is. """ # ensure hostname does not contain characters we can't use if not REGEX_HOSTNAME.match(value): raise ValueError("%s is not valid for %s" % (value, key)) return value @classmethod def validate_resource(cls, key, value): """ Ensure the `value` provided for `key` is within an expected range as specified in `agent.yml` """ min_value = getattr(cls, "MIN_%s" % key.upper()) max_value = getattr(cls, "MAX_%s" % key.upper()) # check the provided input if min_value > value or value > max_value: msg = "value for `%s` must be between " % key msg += "%s and %s" % (min_value, max_value) raise ValueError(msg) return value @classmethod def validate_ip_address(cls, key, value): """ Ensures the :attr:`ip` address is valid. This checks to ensure that the value provided is: * not a hostmask * not link local (:rfc:`3927`) * not used for multicast (:rfc:`1112`) * not a netmask (:rfc:`4632`) * not reserved (:rfc:`6052`) * a private address (:rfc:`1918`) """ if not value: return try: ip = netaddr.IPAddress(value) except (AddrFormatError, ValueError) as e: raise ValueError( "%s is not a valid address format: %s" % (value, e)) if not app.config.get("DEV_ALLOW_ANY_AGENT_ADDRESS", False): if PYFARM_REQUIRE_PRIVATE_IP and not ip.is_private(): raise ValueError("%s is not a private ip address" % value) if not app.config.get("DEV_ALLOW_ANY_AGENT_ADDRESS", False) and \ not all([ not ip.is_hostmask(), not ip.is_link_local(), not ip.is_loopback(), not ip.is_multicast(), not ip.is_netmask(), not ip.is_reserved() ]): raise ValueError("%s is not a usable ip address" % value) return value @validates("ip") def validate_address_column(self, key, value): """validates the ip column""" return self.validate_ip_address(key, value) @validates("hostname") def validate_hostname_column(self, key, value): """validates the hostname column""" return self.validate_hostname(key, value) @validates("ram", "cpus", "port") def validate_resource_column(self, key, value): """validates the ram, cpus, and port columns""" return self.validate_resource(key, value) def serialize_column(self, column): """serializes a single column, typically used by a dictionary mixin""" if isinstance(column, IPAddress): return str(column) return column
class JobTypeVersion(db.Model, UtilityMixins, ReprMixin): """ Defines a specific jobtype version. """ __tablename__ = config.get("table_job_type_version") __table_args__ = (UniqueConstraint("jobtype_id", "version"), ) REPR_COLUMNS = ("id", "jobtype_id", "version") id = id_column(IDTypeWork) jobtype_id = db.Column(IDTypeWork, db.ForeignKey("%s.id" % config.get("table_job_type")), nullable=False, doc="The jobtype this version belongs to") version = db.Column(db.Integer, nullable=False, doc="The version number") max_batch = db.Column( db.Integer, default=config.get("job_type_max_batch"), doc="When the queue runs, this is the maximum number of tasks " "that the queue can select to assign to a single" "agent. If left empty, no maximum applies") batch_contiguous = db.Column( db.Boolean, default=config.get("job_type_batch_contiguous"), doc="If True then the queue will be forced to batch" "numerically contiguous tasks only for this job type. " "For example if True it would batch frames 1, 2, 3, 4 " "together but not 2, 4, 6, 8. If this column is False " "however the queue will batch non-contiguous tasks too.") no_automatic_start_time = db.Column( db.Boolean, nullable=False, default=False, doc="If set, we will not automatically set `time_started_on` " "for the tasks in jobs of this type when they are set " "to `running`.") supports_tiling = db.Column( db.Boolean, default=False, doc="Whether or not the jobtype supports tiling, i.e. splitting single " "frames into regions and then rendering those independently from " "each other.") classname = db.Column( db.String(config.get("job_type_max_class_name_length")), nullable=True, doc="The name of the job class contained within the file being " "loaded. This field may be null but when it's not provided " "job type name will be used instead.") code = db.Column(db.UnicodeText, nullable=False, doc="The source code of the job type") # # Relationships # jobtype = db.relationship("JobType", backref=db.backref("versions", lazy="dynamic", cascade="all, delete-orphan"), doc="Relationship between this version and the " ":class:`JobType` it belongs to" "") jobs = db.relationship("Job", backref="jobtype_version", lazy="dynamic", doc="Relationship between this jobtype version and " ":class:`.Job` objects.") @validates("max_batch") def validate_max_batch(self, key, value): if isinstance(value, int) and value < 1: raise ValueError("max_batch must be greater than or equal to 1") return value @validates("version") def validate_version(self, key, value): if isinstance(value, int) and value < 1: raise ValueError("version must be greater than or equal to 1") return value
class Agent(db.Model, ValidatePriorityMixin, ValidateWorkStateMixin, UtilityMixins, ReprMixin): """ Stores information about an agent include its network address, state, allocation configuration, etc. .. note:: This table enforces two forms of uniqueness. The :attr:`id` column must be unique and the combination of these columns must also be unique to limit the frequency of duplicate data: * :attr:`hostname` * :attr:`port` * :attr:`id` """ __tablename__ = config.get("table_agent") __table_args__ = (UniqueConstraint("hostname", "port", "id"), ) STATE_ENUM = AgentState STATE_DEFAULT = "online" REPR_COLUMNS = ( "id", "hostname", "port", "state", "remote_ip", "cpus", "ram", "free_ram") REPR_CONVERT_COLUMN = {"remote_ip": repr_ip} URL_TEMPLATE = config.get("agent_api_url_template") MIN_PORT = config.get("agent_min_port") MAX_PORT = config.get("agent_max_port") MIN_CPUS = config.get("agent_min_cpus") MAX_CPUS = config.get("agent_max_cpus") MIN_RAM = config.get("agent_min_ram") MAX_RAM = config.get("agent_max_ram") # quick check of the configured data assert MIN_PORT >= 1, "`agent_min_port` must be > 0" assert MAX_PORT >= 1, "`agent_max_port` must be > 0" assert MAX_PORT >= MIN_PORT, "MIN_PORT must be <= MAX_PORT" assert MIN_CPUS >= 1, "`agent_min_cpus` must be > 0" assert MAX_CPUS >= 1, "`agent_max_cpus` must be > 0" assert MAX_CPUS >= MIN_CPUS, "MIN_CPUS must be <= MAX_CPUS" assert MIN_RAM >= 1, "`agent_min_ram` must be > 0" assert MAX_RAM >= 1, "`agent_max_ram` must be > 0" assert MAX_RAM >= MIN_RAM, "`agent_min_ram` must be <= `agent_max_ram`" id = id_column(IDTypeAgent, default=uuid.uuid4, autoincrement=False) # basic host attribute information hostname = db.Column( db.String(config.get("max_hostname_length")), nullable=False, doc="The hostname we should use to talk to this host. " "Preferably this value will be the fully qualified " "name instead of the base hostname alone.") notes = db.Column( db.Text, default="", doc="Free form notes about this agent") remote_ip = db.Column( IPv4Address, nullable=True, doc="the remote address which came in with the request") use_address = db.Column( UseAgentAddressEnum, nullable=False, default=UseAgentAddress.REMOTE, doc="The address we should use when communicating with the agent") # TODO Make non-nullable later os_class = db.Column( OperatingSystemEnum, doc="The type of operating system running on the " "agent; 'linux', 'windows', or 'mac'.") os_fullname = db.Column( db.String(config.get("max_osname_length")), doc="The full human-readable name of the agent's OS, as returned " "by platform.platform()") ram = db.Column( db.Integer, nullable=False, doc="The amount of ram installed on the agent in megabytes") free_ram = db.Column( db.Integer, nullable=False, doc="The amount of ram which was last considered free") cpus = db.Column( db.Integer, nullable=False, doc="The number of logical CPU cores installed on the agent") cpu_name = db.Column( db.String(config.get("max_cpuname_length")), doc="The make and model of CPUs in this agents") port = db.Column( db.Integer, nullable=False, doc="The port the agent is currently running on") time_offset = db.Column( db.Integer, nullable=False, default=0, doc="The offset in seconds the agent is from an official time server") version = db.Column( db.String(16), nullable=True, doc="The pyfarm version number this agent is running.") upgrade_to = db.Column( db.String(16), nullable=True, doc="The version this agent should upgrade to.") restart_requested = db.Column( db.Boolean, default=False, nullable=False, doc="If True, the agent will be restarted") # host state state = db.Column( AgentStateEnum, default=AgentState.ONLINE, nullable=False, doc="Stores the current state of the host. This value can be " "changed either by a master telling the host to do " "something with a task or from the host via REST api.") last_heard_from = db.Column( db.DateTime, default=datetime.utcnow, doc="Time we last had contact with this agent") last_success_on = db.Column( db.DateTime, nullable=True, doc="The last time this agent has set a task to `done`") last_polled = db.Column( db.DateTime, doc="Time we last tried to contact the agent") # Max allocation of the two primary resources which `1.0` is 100% # allocation. For `cpu_allocation` 100% allocation typically means # one task per cpu. ram_allocation = db.Column( db.Float, default=config.get("agent_ram_allocation"), doc="The amount of ram the agent is allowed to allocate " "towards work. A value of 1.0 would mean to let the " "agent use all of the memory installed on the system " "when assigning work.") cpu_allocation = db.Column( db.Float, default=config.get("agent_cpu_allocation"), doc="The total amount of cpu space an agent is allowed to " "process work in. A value of 1.0 would mean an agent " "can handle as much work as the system could handle " "given the requirements of a task. For example if " "an agent has 8 cpus, cpu_allocation is .5, and a " "task requires 4 cpus then only that task will " "run on the system.") # # Relationships # tasks = db.relationship( "Task", backref="agent", lazy="dynamic", doc="Relationship between an :class:`Agent` and any " ":class:`pyfarm.models.Task` objects") tags = db.relationship( "Tag", secondary=AgentTagAssociation, backref=db.backref("agents", lazy="dynamic"), lazy="dynamic", doc="Tags associated with this agent") software_versions = db.relationship( "SoftwareVersion", secondary=AgentSoftwareVersionAssociation, backref=db.backref("agents", lazy="dynamic"), lazy="dynamic", doc="software this agent has installed or is configured for") mac_addresses = db.relationship( "AgentMacAddress", backref="agent", lazy="dynamic", doc="The MAC addresses this agent has", cascade="save-update, merge, delete, delete-orphan") gpus = db.relationship( "GPU", secondary=GPUInAgent, backref=db.backref("agents", lazy="dynamic"), lazy="dynamic", doc="The graphics cards that are installed in this agent") disks = db.relationship( "AgentDisk", backref=db.backref("agent"), lazy="dynamic", doc="The known disks available to this agent", cascade="save-update, merge, delete, delete-orphan") failed_tasks = db.relationship( "Task", secondary=FailedTaskInAgent, backref=db.backref("failed_in_agents", lazy="dynamic"), lazy="dynamic", doc="The tasks this agents failed to execute") def is_offline(self): return self.state == AgentState.OFFLINE def is_disabled(self): return self.state == AgentState.DISABLED def get_supported_types(self): try: return self.support_jobtype_versions except AttributeError: jobtype_versions_query = JobTypeVersion.query.filter( JobTypeVersion.jobs.any( or_(Job.state == None, Job.state == WorkState.RUNNING))) self.support_jobtype_versions = [] for jobtype_version in jobtype_versions_query: if self.satisfies_jobtype_requirements(jobtype_version): self.support_jobtype_versions.append(jobtype_version.id) return self.support_jobtype_versions def satisfies_jobtype_requirements(self, jobtype_version): requirements_to_satisfy = list(jobtype_version.software_requirements) for software_version in self.software_versions: for requirement in list(requirements_to_satisfy): if (software_version.software == requirement.software and (requirement.min_version == None or requirement.min_version.rank <= software_version.rank) and (requirement.max_version == None or requirement.max_version.rank >= software_version.rank)): requirements_to_satisfy.remove(requirement) return len(requirements_to_satisfy) == 0 def satisfies_job_requirements(self, job): if not self.satisfies_jobtype_requirements(job.jobtype_version): return False if self.cpus < job.cpus: return False if self.free_ram < job.ram: return False for tag_requirement in job.tag_requirements: if (not tag_requirement.negate and tag_requirement.tag not in self.tags): return False if (tag_requirement.negate and tag_requirement.tag in self.tags): return False return True @classmethod def validate_hostname(cls, key, value): """ Ensures that the hostname provided by `value` matches a regular expression that expresses what a valid hostname is. """ # ensure hostname does not contain characters we can't use if not REGEX_HOSTNAME.match(value): raise ValueError("%s is not valid for %s" % (value, key)) return value @classmethod def validate_resource(cls, key, value): """ Ensure the ``value`` provided for ``key`` is within an expected range. This classmethod retrieves the min and max values from the :class:`Agent` class directory using: >>> min_value = getattr(Agent, "MIN_%s" % key.upper()) >>> max_value = getattr(Agent, "MAX_%s" % key.upper()) """ min_value = getattr(cls, "MIN_%s" % key.upper()) max_value = getattr(cls, "MAX_%s" % key.upper()) # check the provided input if not min_value <= value <= max_value: msg = "value for `%s` must be between " % key msg += "%s and %s" % (min_value, max_value) raise ValueError(msg) return value @classmethod def validate_ipv4_address(cls, _, value): """ Ensures the :attr:`ip` address is valid. This checks to ensure that the value provided is: * not a hostmask * not link local (:rfc:`3927`) * not used for multicast (:rfc:`1112`) * not a netmask (:rfc:`4632`) * not reserved (:rfc:`6052`) * a private address (:rfc:`1918`) """ if value is None: return value try: address = IPAddress(value) except (AddrFormatError, ValueError) as e: raise ValueError( "%s is not a valid address format: %s" % (value, e)) if ALLOW_AGENT_LOOPBACK: loopback = lambda: False else: loopback = address.is_loopback if any([address.is_hostmask(), address.is_link_local(), loopback(), address.is_multicast(), address.is_netmask(), address.is_reserved()]): raise ValueError("%s is not a valid address type" % value) return value def api_url(self): """ Returns the base url which should be used to access the api of this specific agent. :except ValueError: Raised if this function is called while the agent's :attr:`use_address` column is set to ``PASSIVE`` """ if self.use_address == UseAgentAddress.REMOTE: return self.URL_TEMPLATE.format( host=self.remote_ip, port=self.port ) elif self.use_address == UseAgentAddress.HOSTNAME: return self.URL_TEMPLATE.format( host=self.hostname, port=self.port ) else: raise ValueError( "Cannot construct an agent API url using mode %r " "`use_address`" % self.use_address) @validates("hostname") def validate_hostname_column(self, key, value): """Validates the hostname column""" return self.validate_hostname(key, value) @validates("ram", "cpus", "port") def validate_numeric_column(self, key, value): """ Validates several numerical columns. Columns such as ram, cpus and port a are validated with this method. """ return self.validate_resource(key, value) @validates("remote_ip") def validate_remote_ip(self, key, value): """Validates the remote_ip column""" return self.validate_ipv4_address(key, value)