class BuildPhase(db.Model): """ A build phase represents a grouping of jobs. For example, a common situation for a build is that it has a "test" and a "release" phase. In this case, we'd have one or more jobs under test, and one or more jobs under release. These test jobs may be things like "Windows" and "Linux", whereas the release may simply be "Upload Tarball". The build phase represents the aggregate result of all jobs under it. """ __tablename__ = 'buildphase' __table_args__ = (UniqueConstraint('build_id', 'label', name='unq_buildphase_key'), ) id = Column(GUID, nullable=False, primary_key=True, default=uuid.uuid4) build_id = Column(GUID, ForeignKey('build.id', ondelete="CASCADE"), nullable=False) project_id = Column(GUID, ForeignKey('project.id', ondelete="CASCADE"), nullable=False) label = Column(String(128), nullable=False) status = Column(Enum(Status), nullable=False, default=Status.unknown, server_default='0') result = Column(Enum(Result), nullable=False, default=Result.unknown, server_default='0') order = Column(Integer, nullable=False, default=0, server_default='0') duration = Column(Integer) date_started = Column(DateTime) date_finished = Column(DateTime) date_created = Column(DateTime, default=datetime.utcnow, nullable=False, server_default='now()') build = relationship('Build', backref=backref('phases', order_by='BuildPhase.date_started')) project = relationship('Project') def __init__(self, **kwargs): super(BuildPhase, self).__init__(**kwargs) if self.id is None: self.id = uuid.uuid4() if self.result is None: self.result = Result.unknown if self.status is None: self.status = Result.unknown if self.date_created is None: self.date_created = datetime.utcnow() if self.date_started and self.date_finished and not self.duration: self.duration = (self.date_finished - self.date_started).total_seconds() * 1000
class JobStep(db.Model): """ The most granular unit of work; run on a particular node, has a status and a result. """ __tablename__ = 'jobstep' __table_args__ = ( Index('idx_jobstep_status', 'status'), ) id = Column(GUID, primary_key=True, default=uuid.uuid4) job_id = Column(GUID, ForeignKey('job.id', ondelete="CASCADE"), nullable=False) phase_id = Column(GUID, ForeignKey('jobphase.id', ondelete="CASCADE"), nullable=False) project_id = Column(GUID, ForeignKey('project.id', ondelete="CASCADE"), nullable=False) label = Column(String(128), nullable=False) status = Column(Enum(Status), nullable=False, default=Status.unknown) result = Column(Enum(Result), nullable=False, default=Result.unknown) node_id = Column(GUID, ForeignKey('node.id', ondelete="CASCADE")) # id of JobStep that replaces this JobStep. Usually None, unless a JobStep # fails and is retried. replacement_id = Column(GUID, ForeignKey('jobstep.id', ondelete="CASCADE"), unique=True) date_started = Column(DateTime) date_finished = Column(DateTime) date_created = Column(DateTime, default=datetime.utcnow) last_heartbeat = Column(DateTime) data = Column(JSONEncodedDict) job = relationship('Job') project = relationship('Project') node = relationship('Node') phase = relationship('JobPhase', backref=backref('steps', order_by='JobStep.date_started')) __repr__ = model_repr('label') def __init__(self, **kwargs): super(JobStep, self).__init__(**kwargs) if self.id is None: self.id = uuid.uuid4() if self.result is None: self.result = Result.unknown if self.status is None: self.status = Status.unknown if self.date_created is None: self.date_created = datetime.utcnow() if self.data is None: self.data = {} @property def duration(self): """ Return the duration (in milliseconds) that this item was in-progress. """ if self.date_started and self.date_finished: duration = (self.date_finished - self.date_started).total_seconds() * 1000 else: duration = None return duration
class Job(db.Model): __tablename__ = 'job' __table_args__ = ( Index('idx_build_project_id', 'project_id'), Index('idx_build_change_id', 'change_id'), Index('idx_build_source_id', 'source_id'), Index('idx_build_family_id', 'build_id'), UniqueConstraint('build_id', 'number', name='unq_job_number'), ) id = Column(GUID, primary_key=True, default=uuid.uuid4) number = Column(Integer) # TODO(dcramer): change should be removed in favor of an m2m between # Change and Source build_id = Column(GUID, ForeignKey('build.id', ondelete="CASCADE")) change_id = Column(GUID, ForeignKey('change.id', ondelete="CASCADE")) project_id = Column(GUID, ForeignKey('project.id', ondelete="CASCADE"), nullable=False) source_id = Column(GUID, ForeignKey('source.id', ondelete="CASCADE")) label = Column(String(128), nullable=False) status = Column(Enum(Status), nullable=False, default=Status.unknown) result = Column(Enum(Result), nullable=False, default=Result.unknown) duration = Column(Integer) date_started = Column(DateTime) date_finished = Column(DateTime) date_created = Column(DateTime, default=datetime.utcnow) date_modified = Column(DateTime, default=datetime.utcnow) data = Column(JSONEncodedDict) change = relationship('Change') build = relationship('Build', backref=backref('jobs', order_by='Job.number'), innerjoin=True) project = relationship('Project') source = relationship('Source') __repr__ = model_repr('label', 'target') def __init__(self, **kwargs): super(Job, self).__init__(**kwargs) if self.data is None: self.data = {} if self.id is None: self.id = uuid.uuid4() if self.result is None: self.result = Result.unknown if self.status is None: self.status = Status.unknown if self.date_created is None: self.date_created = datetime.utcnow() if self.date_modified is None: self.date_modified = self.date_created if self.date_started and self.date_finished and not self.duration: self.duration = (self.date_finished - self.date_started).total_seconds() * 1000 if self.number is None and self.build: self.number = select([func.next_item_value(self.build.id.hex)])
class JobStep(db.Model): # TODO(dcramer): make duration a column __tablename__ = 'jobstep' id = Column(GUID, primary_key=True, default=uuid.uuid4) job_id = Column(GUID, ForeignKey('job.id', ondelete="CASCADE"), nullable=False) phase_id = Column(GUID, ForeignKey('jobphase.id', ondelete="CASCADE"), nullable=False) project_id = Column(GUID, ForeignKey('project.id', ondelete="CASCADE"), nullable=False) label = Column(String(128), nullable=False) status = Column(Enum(Status), nullable=False, default=Status.unknown) result = Column(Enum(Result), nullable=False, default=Result.unknown) node_id = Column(GUID, ForeignKey('node.id', ondelete="CASCADE")) date_started = Column(DateTime) date_finished = Column(DateTime) date_created = Column(DateTime, default=datetime.utcnow) data = Column(JSONEncodedDict) job = relationship('Job') project = relationship('Project') node = relationship('Node') phase = relationship('JobPhase', backref=backref('steps', order_by='JobStep.date_started')) def __init__(self, **kwargs): super(JobStep, self).__init__(**kwargs) if self.id is None: self.id = uuid.uuid4() if self.result is None: self.result = Result.unknown if self.status is None: self.status = Result.unknown if self.date_created is None: self.date_created = datetime.utcnow() if self.data is None: self.data = {} @property def duration(self): """ Return the duration (in milliseconds) that this item was in-progress. """ if self.date_started and self.date_finished: duration = (self.date_finished - self.date_started).total_seconds() * 1000 else: duration = None return duration
class Task(db.Model): __tablename__ = 'task' __table_args__ = ( Index('idx_task_parent_id', 'parent_id', 'task_name'), Index('idx_task_child_id', 'child_id', 'task_name'), UniqueConstraint('task_name', 'parent_id', 'child_id', name='unq_task_entity'), ) id = Column(GUID, primary_key=True, default=uuid.uuid4) task_name = Column(String(128), nullable=False) task_id = Column('child_id', GUID, nullable=False) parent_id = Column(GUID) status = Column(Enum(Status), nullable=False, default=Status.unknown) result = Column(Enum(Result), nullable=False, default=Result.unknown) num_retries = Column(Integer, nullable=False, default=0) date_started = Column(DateTime) date_finished = Column(DateTime) date_created = Column(DateTime, default=datetime.utcnow) date_modified = Column(DateTime, default=datetime.utcnow) data = Column(JSONEncodedDict) __repr__ = model_repr('task_name', 'parent_id', 'child_id', 'status') def __init__(self, **kwargs): super(Task, self).__init__(**kwargs) if self.id is None: self.id = uuid.uuid4() if self.result is None: self.result = Result.unknown if self.date_created is None: self.date_created = datetime.utcnow() if self.date_modified is None: self.date_modified = self.date_created @classmethod def check(cls, task_name, parent_id): """ >>> if Task.check('my_task', parent_item.id) == Status.finished: >>> print "all child tasks done!" """ # XXX(dcramer): we could make this fast if we're concerneda bout # of # rows by doing two network hops (first check for in progress, then # report result) child_tasks = list( db.session.query(cls.result, Task.status).filter( cls.task_name == task_name, cls.parent_id == parent_id, )) if any(r.status != Status.finished for r in child_tasks): return Status.in_progress return Status.finished
class Project(db.Model): __tablename__ = 'project' id = Column(GUID, primary_key=True, default=uuid4) slug = Column(String(64), unique=True, nullable=False) repository_id = Column(GUID, ForeignKey('repository.id', ondelete="RESTRICT"), nullable=False) name = Column(String(64)) date_created = Column(DateTime, default=datetime.utcnow) avg_build_time = Column(Integer) status = Column(Enum(ProjectStatus), default=ProjectStatus.active, server_default='1') repository = relationship('Repository') plans = association_proxy('project_plans', 'plan') def __init__(self, **kwargs): super(Project, self).__init__(**kwargs) if not self.id: self.id = uuid4() if not self.slug: self.slug = slugify(self.name) @classmethod def get(cls, id): project = cls.query.options( joinedload(cls.repository, innerjoin=True), ).filter_by(slug=id).first() if project is None and len(id) == 32: project = cls.query.options( joinedload(cls.repository), ).get(id) return project
class JobPhase(db.Model): # TODO(dcramer): add order column rather than implicity date_started ordering # TODO(dcramer): make duration a column __tablename__ = 'jobphase' __table_args__ = ( UniqueConstraint('job_id', 'label', name='unq_jobphase_key'), ) id = Column(GUID, nullable=False, primary_key=True, default=uuid.uuid4) job_id = Column(GUID, ForeignKey('job.id', ondelete="CASCADE"), nullable=False) project_id = Column(GUID, ForeignKey('project.id', ondelete="CASCADE"), nullable=False) label = Column(String(128), nullable=False) status = Column(Enum(Status), nullable=False, default=Status.unknown) result = Column(Enum(Result), nullable=False, default=Result.unknown) date_started = Column(DateTime) date_finished = Column(DateTime) date_created = Column(DateTime, default=datetime.utcnow) job = relationship('Job', backref=backref('phases', order_by='JobPhase.date_started')) project = relationship('Project') def __init__(self, **kwargs): super(JobPhase, self).__init__(**kwargs) if self.id is None: self.id = uuid.uuid4() if self.result is None: self.result = Result.unknown if self.status is None: self.status = Result.unknown if self.date_created is None: self.date_created = datetime.utcnow() @property def duration(self): """ Return the duration (in milliseconds) that this item was in-progress. """ if self.date_started and self.date_finished: duration = (self.date_finished - self.date_started).total_seconds() * 1000 else: duration = None return duration
class Command(db.Model): __tablename__ = 'command' __table_args__ = (UniqueConstraint('jobstep_id', 'order', name='unq_command_order'), ) id = Column(GUID, primary_key=True, default=uuid.uuid4) jobstep_id = Column(GUID, ForeignKey('jobstep.id', ondelete="CASCADE"), nullable=False) label = Column(String(128), nullable=False) status = Column(Enum(Status), nullable=False, default=Status.unknown) return_code = Column(Integer, nullable=True) script = Column(Text(), nullable=False) env = Column(JSONEncodedDict, nullable=True) cwd = Column(String(256), nullable=True) artifacts = Column(ARRAY(String(256)), nullable=True) date_started = Column(DateTime) date_finished = Column(DateTime) date_created = Column(DateTime, default=datetime.utcnow) data = Column(JSONEncodedDict) order = Column(Integer, default=0, server_default='0', nullable=False) jobstep = relationship('JobStep', backref=backref('commands', order_by='Command.order')) def __init__(self, **kwargs): super(Command, self).__init__(**kwargs) if self.id is None: self.id = uuid.uuid4() if self.status is None: self.status = Status.unknown if self.date_created is None: self.date_created = datetime.utcnow() if self.data is None: self.data = {} @property def duration(self): """ Return the duration (in milliseconds) that this item was in-progress. """ if self.date_started and self.date_finished: duration = (self.date_finished - self.date_started).total_seconds() * 1000 else: duration = None return duration
class BazelTarget(db.Model): __tablename__ = 'bazeltarget' id = Column(GUID, nullable=False, primary_key=True, default=uuid.uuid4) step_id = Column(GUID, ForeignKey('jobstep.id', ondelete="CASCADE")) job_id = Column(GUID, ForeignKey('job.id', ondelete="CASCADE"), nullable=False) name = Column(Text, nullable=False) status = Column(Enum(Status), nullable=False, default=Status.unknown) result = Column(Enum(Result), default=Result.unknown, nullable=False) result_source = Column(Enum(ResultSource), default=ResultSource.from_self) duration = Column(Integer, default=0) date_created = Column(DateTime, default=datetime.utcnow, nullable=False) def __init__(self, **kwargs): super(BazelTarget, self).__init__(**kwargs) if self.id is None: self.id = uuid.uuid4() if self.result is None: self.result = Result.unknown if self.status is None: self.status = Status.unknown if self.date_created is None: self.date_created = datetime.utcnow()
class RevisionResult(db.Model): __tablename__ = 'revisionresult' __table_args__ = (UniqueConstraint('project_id', 'revision_sha', name='unq_project_revision_pair'), ) id = Column(GUID, nullable=False, primary_key=True, default=uuid.uuid4) build_id = Column(GUID, ForeignKey('build.id')) revision_sha = Column(String(40), nullable=False) project_id = Column(GUID, ForeignKey('project.id', ondelete='CASCADE'), nullable=False) result = Column(Enum(Result), nullable=False, default=Result.unknown) build = relationship('Build') project = relationship('Project') def __init__(self, **kwargs): super(RevisionResult, self).__init__(**kwargs) if self.id is None: self.id = uuid.uuid4() if self.result is None: self.result = Result.unknown
class JobStep(db.Model): """ The most granular unit of work; run on a particular node, has a status and a result. """ __tablename__ = 'jobstep' __table_args__ = ( Index('idx_jobstep_status', 'status'), Index('idx_jobstep_cluster', 'cluster'), Index('idx_jobstep_project_date', 'project_id', 'date_created'), ) id = Column(GUID, primary_key=True, default=uuid.uuid4) job_id = Column(GUID, ForeignKey('job.id', ondelete="CASCADE"), nullable=False) phase_id = Column(GUID, ForeignKey('jobphase.id', ondelete="CASCADE"), nullable=False) project_id = Column(GUID, ForeignKey('project.id', ondelete="CASCADE"), nullable=False) label = Column(String(128), nullable=False) status = Column(Enum(Status), nullable=False, default=Status.unknown) result = Column(Enum(Result), nullable=False, default=Result.unknown) node_id = Column(GUID, ForeignKey('node.id', ondelete="CASCADE")) # id of JobStep that replaces this JobStep. Usually None, unless a JobStep # fails and is retried. replacement_id = Column(GUID, ForeignKey('jobstep.id', ondelete="CASCADE"), unique=True) # Used (for non-Jenkins builds) in jobstep_allocate to only allocate jobsteps # to slaves of a particular cluster. For Jenkins builds, this is pure documentation (typically # set to the Jenkins label), but should be accurate just the same. cluster = Column(String(128), nullable=True) date_started = Column(DateTime) date_finished = Column(DateTime) date_created = Column(DateTime, default=datetime.utcnow) # The time of the last external interaction indicating progress. last_heartbeat = Column(DateTime) data = Column(JSONEncodedDict) job = relationship('Job') project = relationship('Project') node = relationship('Node') phase = relationship('JobPhase', backref=backref('steps', order_by='JobStep.date_started')) targets = relationship(BazelTarget, backref=backref('step')) __repr__ = model_repr('label') def __init__(self, **kwargs): super(JobStep, self).__init__(**kwargs) if self.id is None: self.id = uuid.uuid4() if self.result is None: self.result = Result.unknown if self.status is None: self.status = Status.unknown if self.date_created is None: self.date_created = datetime.utcnow() if self.data is None: self.data = {} @property def duration(self): """ Return the duration (in milliseconds) that this item was in-progress. """ if self.date_started and self.date_finished: duration = (self.date_finished - self.date_started).total_seconds() * 1000 else: duration = None return duration
class JobStep(db.Model): """ The most granular unit of work; run on a particular node, has a status and a result. But Hark! There's a hack that allows jobstep, once its run, to rewrite history to say that it was actually multiple jobsteps (even organized into separate job phases.) It does this by creating an artifact, which the python code picks up and then retroactively alters the db to say that this jobstep had multiple steps (I think it purely appends new jobsteps after the original.) xplat uses this to very nicely display the different parts of their jobstep. """ # TODO(dcramer): make duration a column __tablename__ = 'jobstep' __table_args__ = ( Index('idx_jobstep_status', 'status'), ) id = Column(GUID, primary_key=True, default=uuid.uuid4) job_id = Column(GUID, ForeignKey('job.id', ondelete="CASCADE"), nullable=False) phase_id = Column(GUID, ForeignKey('jobphase.id', ondelete="CASCADE"), nullable=False) project_id = Column(GUID, ForeignKey('project.id', ondelete="CASCADE"), nullable=False) label = Column(String(128), nullable=False) status = Column(Enum(Status), nullable=False, default=Status.unknown) result = Column(Enum(Result), nullable=False, default=Result.unknown) node_id = Column(GUID, ForeignKey('node.id', ondelete="CASCADE")) date_started = Column(DateTime) date_finished = Column(DateTime) date_created = Column(DateTime, default=datetime.utcnow) last_heartbeat = Column(DateTime) data = Column(JSONEncodedDict) job = relationship('Job') project = relationship('Project') node = relationship('Node') phase = relationship('JobPhase', backref=backref('steps', order_by='JobStep.date_started')) __repr__ = model_repr('label') def __init__(self, **kwargs): super(JobStep, self).__init__(**kwargs) if self.id is None: self.id = uuid.uuid4() if self.result is None: self.result = Result.unknown if self.status is None: self.status = Status.unknown if self.date_created is None: self.date_created = datetime.utcnow() if self.data is None: self.data = {} @property def duration(self): """ Return the duration (in milliseconds) that this item was in-progress. """ if self.date_started and self.date_finished: duration = (self.date_finished - self.date_started).total_seconds() * 1000 else: duration = None return duration
class Task(db.Model): """ When we enqueue a task, we also write a db row to keep track of the task's metadata (e.g. number of times retried.) There is a slightly icky custom data column that each task type uses in its own way. This db represents serialized version of tracked_task you see in the changes python codebase. Tasks can have parent tasks. Parent tasks have the option of waiting for their children to complete (in practice, that always happens.) Example: sync_job with sync_jobstep children Tasks can throw a NotFinished exception, which will just mean that we try running it again after some interval (but this has nothing to do with retrying tasks that error!) Examples: Tasks with children will check to see if their children are finished; the sync_jobstep task will query jenkins to see if its finished. Tasks can fire signals, e.g. build xxx has finished. There's a table that maps signal types to tasks that should be created. Signals/listeners are not tracked as children of other tasks. """ __tablename__ = 'task' __table_args__ = ( Index('idx_task_parent_id', 'parent_id', 'task_name'), Index('idx_task_child_id', 'child_id', 'task_name'), Index('idx_task_date_created', 'date_created'), UniqueConstraint('task_name', 'parent_id', 'child_id', name='unq_task_entity'), Index('idx_task_status', 'status'), ) id = Column(GUID, primary_key=True, default=uuid.uuid4) task_name = Column(String(128), nullable=False) # TODO: Rename 'task_id' to 'child_id' in code to make things less confusing. task_id = Column('child_id', GUID, nullable=False) parent_id = Column(GUID) status = Column(Enum(Status), nullable=False, default=Status.unknown) result = Column(Enum(Result), nullable=False, default=Result.unknown) num_retries = Column(Integer, nullable=False, default=0) date_started = Column(DateTime) date_finished = Column(DateTime) date_created = Column(DateTime, default=datetime.utcnow) date_modified = Column(DateTime, default=datetime.utcnow) data = Column(JSONEncodedDict) __repr__ = model_repr('task_name', 'parent_id', 'child_id', 'status') def __init__(self, **kwargs): super(Task, self).__init__(**kwargs) if self.id is None: self.id = uuid.uuid4() if self.result is None: self.result = Result.unknown if self.date_created is None: self.date_created = datetime.utcnow() if self.date_modified is None: self.date_modified = self.date_created @classmethod def check(cls, task_name, parent_id): """ >>> if Task.check('my_task', parent_item.id) == Status.finished: >>> print "all child tasks done!" """ # XXX(dcramer): we could make this fast if we're concerned about # of # rows by doing two network hops (first check for in progress, then # report result) child_tasks = list( db.session.query(cls.result, Task.status).filter( cls.task_name == task_name, cls.parent_id == parent_id, )) if any(r.status != Status.finished for r in child_tasks): return Status.in_progress return Status.finished
class TestCase(db.Model): """ An individual test result. """ __tablename__ = 'test' __table_args__ = ( UniqueConstraint('job_id', 'label_sha', name='unq_test_name'), Index('idx_test_step_id', 'step_id'), Index('idx_test_project_key', 'project_id', 'label_sha'), ) id = Column(GUID, nullable=False, primary_key=True, default=uuid.uuid4) job_id = Column(GUID, ForeignKey('job.id', ondelete="CASCADE"), nullable=False) project_id = Column(GUID, ForeignKey('project.id', ondelete="CASCADE"), nullable=False) step_id = Column(GUID, ForeignKey('jobstep.id', ondelete="CASCADE")) name_sha = Column('label_sha', String(40), nullable=False) name = Column(Text, nullable=False) _package = Column('package', Text, nullable=True) result = Column(Enum(Result), default=Result.unknown, nullable=False) duration = Column(Integer, default=0) message = deferred(Column(Text)) date_created = Column(DateTime, default=datetime.utcnow, nullable=False) reruns = Column(Integer) job = relationship('Job') step = relationship('JobStep') project = relationship('Project') __repr__ = model_repr('name', '_package', 'result') def __init__(self, **kwargs): super(TestCase, self).__init__(**kwargs) if self.id is None: self.id = uuid.uuid4() if self.result is None: self.result = Result.unknown if self.date_created is None: self.date_created = datetime.utcnow() @classmethod def calculate_name_sha(self, name): if name: return sha1(name).hexdigest() raise ValueError @property def sep(self): name = (self._package or self.name) # handle the case where it might begin with some special character if not re.match(r'^[a-zA-Z0-9]', name): return '/' elif '/' in name: return '/' return '.' def _get_package(self): if not self._package: try: package, _ = self.name.rsplit(self.sep, 1) except ValueError: package = None else: package = self._package return package def _set_package(self, value): self._package = value package = property(_get_package, _set_package) @property def short_name(self): name, package = self.name, self.package if package and name.startswith(package) and name != package: return name[len(package) + 1:] return name
class Project(db.Model): """ The way we organize changes. Each project is linked to one repository, and usually kicks off builds for it when new revisions come it (or just for some revisions based on filters.) Projects use build plans (see plan) to describe the work to be done for a build. """ __tablename__ = 'project' id = Column(GUID, primary_key=True, default=uuid4) slug = Column(String(64), unique=True, nullable=False) repository_id = Column(GUID, ForeignKey('repository.id', ondelete="RESTRICT"), nullable=False) name = Column(String(64)) date_created = Column(DateTime, default=datetime.utcnow) avg_build_time = Column(Integer) status = Column(Enum(ProjectStatus), default=ProjectStatus.active, server_default='1') repository = relationship('Repository') plans = association_proxy('project_plans', 'plan') def __init__(self, **kwargs): super(Project, self).__init__(**kwargs) if not self.id: self.id = uuid4() if not self.slug: self.slug = slugify(self.name) @classmethod def get(cls, id): project = cls.query.options( joinedload(cls.repository, innerjoin=True), ).filter_by(slug=id).first() if project is None and len(id) == 32: project = cls.query.options( joinedload(cls.repository), ).get(id) return project _default_config = { 'build.file-blacklist': [], 'bazel.additional-test-flags': [], 'bazel.selective-testing-enabled': False, 'bazel.exclude-tags': ['manual'], # Ignore tests with manual tag 'bazel.cpus': DEFAULT_CPUS, 'bazel.mem': DEFAULT_MEMORY_MB, 'bazel.max-executors': 1, } def get_config_path(self): # TODO in the future, get this file path from ProjectOption return '{}.yaml'.format(self.slug) def get_config(self, revision_sha, diff=None, config_path=None): '''Get the config for this project. Right now, the config lives at {slug}.yaml, at the root of the repository. This will change later on. The supplied config is applied on top of the default config (`_default_config`). In the case where the file is not found, or the file's YAML is not a dict, the default config is returned. Args: revision_sha (str): The sha identifying the revision, so the returned config is for that revision. diff (str): The diff to apply before reading the config, used for diff builds. Optional. config_path (str): The path of the config file Returns: dict - the config Raises: ConcurrentUpdateError - When vcs update failed because another vcs update is running InvalidDiffError - When the supplied diff does not apply ProjectConfigError - When the config file is invalid YAML. NotImplementedError - When the project has no vcs backend UnknownRevision - When the supplied revision_sha does not appear to exist ''' # changes.vcs.base imports some models, which may lead to circular # imports, so let's import on-demand from changes.vcs.base import CommandError, ContentReadError, MissingFileError, ConcurrentUpdateError, UnknownRevision if config_path is None: config_path = self.get_config_path() vcs = self.repository.get_vcs() if vcs is None: raise NotImplementedError else: try: # repo might not be updated on this machine yet try: config_content = vcs.read_file(revision_sha, config_path, diff=diff) except UnknownRevision: try: vcs.update() except ConcurrentUpdateError: # Retry once if it was already updating. vcs.update() # now that we've updated the repo, try reading the file again config_content = vcs.read_file(revision_sha, config_path, diff=diff) # this won't catch error when diff doesn't apply, which is good. except CommandError as err: logging.warning('Git invocation failed for project %s: %s', self.slug, str(err), exc_info=True) config_content = '{}' except MissingFileError: config_content = '{}' except ContentReadError as err: logging.warning('Config for project %s cannot be read: %s', self.slug, str(err), exc_info=True) config_content = '{}' try: config = yaml.safe_load(config_content) if not isinstance(config, dict): # non-dict configs are technically invalid, but until we # have a good way to message invalid configs, # it's better to just ignore the config rather than breaking # the control flow of `get_config()` callers. logging.warning('Config for project %s is not a dict, using default config', self.slug, extra={'data': {'revision': revision_sha, 'diff': diff}}) config = {} except yaml.YAMLError: raise ProjectConfigError( 'Invalid project config file {}'.format(config_path)) for k, v in self._default_config.iteritems(): config.setdefault(k, v) return config
class JobPhase(db.Model): """A JobPhase is a grouping of one or more JobSteps performing the same basic task. The phases of a Job are intended to be executed sequentially, though that isn't necesarily enforced. One example of phase usage: a Job may have a test collection phase and a test execution phase, with a single JobStep collecting tests in the first phase and an arbitrary number of JobSteps executing shards of the collected tests in the second phase. By using two phases, the types of JobSteps can be tracked and managed independently. Though JobPhases are typically created to group newly created JobSteps, they can also be constructed retroactively once a JobStep has finished based on phased artifacts. This is convenient but a little confusing, and perhaps should be handled by another mechanism. """ # TODO(dcramer): add order column rather than implicity date_started ordering # TODO(dcramer): make duration a column __tablename__ = 'jobphase' __table_args__ = (UniqueConstraint('job_id', 'label', name='unq_jobphase_key'), ) id = Column(GUID, nullable=False, primary_key=True, default=uuid.uuid4) job_id = Column(GUID, ForeignKey('job.id', ondelete="CASCADE"), nullable=False) project_id = Column(GUID, ForeignKey('project.id', ondelete="CASCADE"), nullable=False) label = Column(String(128), nullable=False) status = Column(Enum(Status), nullable=False, default=Status.unknown) result = Column(Enum(Result), nullable=False, default=Result.unknown) date_started = Column(DateTime) date_finished = Column(DateTime) date_created = Column(DateTime, default=datetime.utcnow) job = relationship('Job', backref=backref('phases', order_by='JobPhase.date_started')) project = relationship('Project') def __init__(self, **kwargs): super(JobPhase, self).__init__(**kwargs) if self.id is None: self.id = uuid.uuid4() if self.result is None: self.result = Result.unknown if self.status is None: self.status = Status.unknown if self.date_created is None: self.date_created = datetime.utcnow() @property def duration(self): """ Return the duration (in milliseconds) that this item was in-progress. """ if self.date_started and self.date_finished: duration = (self.date_finished - self.date_started).total_seconds() * 1000 else: duration = None return duration @property def current_steps(self): """ Return only steps from this phase that have not been replaced. """ # note that the self.steps property exists because of a backref in JobStep return [s for s in self.steps if s.replacement_id is None]
class Project(db.Model): """ The way we organize changes. Each project is linked to one repository, and usually kicks off builds for it when new revisions come it (or just for some revisions based on filters.) Projects use build plans (see plan) to describe the work to be done for a build. """ __tablename__ = 'project' id = Column(GUID, primary_key=True, default=uuid4) slug = Column(String(64), unique=True, nullable=False) repository_id = Column(GUID, ForeignKey('repository.id', ondelete="RESTRICT"), nullable=False) name = Column(String(64)) date_created = Column(DateTime, default=datetime.utcnow) avg_build_time = Column(Integer) status = Column(Enum(ProjectStatus), default=ProjectStatus.active, server_default='1') repository = relationship('Repository') plans = association_proxy('project_plans', 'plan') def __init__(self, **kwargs): super(Project, self).__init__(**kwargs) if not self.id: self.id = uuid4() if not self.slug: self.slug = slugify(self.name) @classmethod def get(cls, id): project = cls.query.options(joinedload( cls.repository, innerjoin=True), ).filter_by(slug=id).first() if project is None and len(id) == 32: project = cls.query.options(joinedload(cls.repository), ).get(id) return project _default_config = {'build.file-blacklist': []} def get_config_path(self): # TODO in the future, get this file path from ProjectOption return '{}.yaml'.format(self.slug) def get_config(self, revision_sha, diff=None, config_path=None): '''Get the config for this project. Right now, the config lives at {slug}.yaml, at the root of the repository. This will change later on. The supplied config is applied on top of the default config (`_default_config`). In the case where the file is not found, the default config is returned. Args: revision_sha (str): The sha identifying the revision, so the returned config is for that revision. diff (str): The diff to apply before reading the config, used for diff builds. Optional. config_path (str): The path of the config file Returns: dict - the config Raises: InvalidDiffError - When the supplied diff does not apply ProjectConfigError - When the config file is in an invalid format. NotImplementedError - When the project has no vcs backend ''' # changes.vcs.base imports some models, which may lead to circular # imports, so let's import on-demand from changes.vcs.base import CommandError if config_path is None: config_path = self.get_config_path() vcs = self.repository.get_vcs() if vcs is None: raise NotImplementedError else: try: config_content = vcs.read_file(revision_sha, config_path, diff=diff) except CommandError: # this won't catch error when diff doesn't apply, which is good. config_content = '{}' try: config = yaml.safe_load(config_content) if not isinstance(config, dict): raise ProjectConfigError( 'Invalid project config file {}'.format(config_path)) except yaml.YAMLError: raise ProjectConfigError( 'Invalid project config file {}'.format(config_path)) for k, v in self._default_config.iteritems(): config.setdefault(k, v) return config
class Job(db.Model): """ An instantiation of a plan for a particular build. We run the code specified by the appropriate plan. That code creates and farms out a bunch of jobsteps to do the actual work. """ __tablename__ = 'job' __table_args__ = ( Index('idx_build_project_id', 'project_id'), Index('idx_build_change_id', 'change_id'), Index('idx_build_source_id', 'source_id'), Index('idx_build_family_id', 'build_id'), Index('idx_job_source_date', 'source_id', 'status', 'date_created'), UniqueConstraint('build_id', 'number', name='unq_job_number'), ) id = Column(GUID, primary_key=True, default=uuid.uuid4) number = Column(Integer) # TODO(dcramer): change should be removed in favor of an m2m between # Change and Source build_id = Column(GUID, ForeignKey('build.id', ondelete="CASCADE")) change_id = Column(GUID, ForeignKey('change.id', ondelete="CASCADE")) project_id = Column(GUID, ForeignKey('project.id', ondelete="CASCADE"), nullable=False) source_id = Column(GUID, ForeignKey('source.id', ondelete="CASCADE")) label = Column(String(128), nullable=False) status = Column(Enum(Status), nullable=False, default=Status.unknown) result = Column(Enum(Result), nullable=False, default=Result.unknown) duration = Column(Integer) date_started = Column(DateTime) date_finished = Column(DateTime) date_created = Column(DateTime, default=datetime.utcnow) date_modified = Column(DateTime, default=datetime.utcnow) data = Column(JSONEncodedDict) # Used to distinguish between normal and autogenerated jobs. # Autogenerated jobs require special logic to generate buildsteps # since they have no jobplan associated with them. autogenerated = Column(Boolean, nullable=False, default=False) change = relationship('Change') build = relationship('Build', backref=backref('jobs', order_by='Job.number'), innerjoin=True) project = relationship('Project') source = relationship('Source') targets = relationship(BazelTarget, backref=backref('job')) __repr__ = model_repr('label', 'target') def __init__(self, **kwargs): super(Job, self).__init__(**kwargs) if self.data is None: self.data = {} if self.id is None: self.id = uuid.uuid4() if self.result is None: self.result = Result.unknown if self.status is None: self.status = Status.unknown if self.date_created is None: self.date_created = datetime.utcnow() if self.date_modified is None: self.date_modified = self.date_created if self.date_started and self.date_finished and not self.duration: self.duration = (self.date_finished - self.date_started).total_seconds() * 1000 if self.number is None and self.build: self.number = select([func.next_item_value(self.build.id.hex)])
class Build(db.Model): """ Represents a collection of builds for a single target, as well as the sum of their results. Each Build contains many Jobs (usually linked to a JobPlan). """ __tablename__ = 'build' __table_args__ = ( Index('idx_buildfamily_project_id', 'project_id'), Index('idx_buildfamily_repository_sha', 'repository_id', 'revision_sha'), Index('idx_buildfamily_author_id', 'author_id'), Index('idx_buildfamily_patch_id', 'patch_id'), Index('idx_buildfamily_source_id', 'source_id'), UniqueConstraint('project_id', 'number', name='unq_build_number'), ) id = Column(GUID, primary_key=True, default=uuid.uuid4) number = Column(Integer) project_id = Column(GUID, ForeignKey('project.id', ondelete="CASCADE"), nullable=False) source_id = Column(GUID, ForeignKey('source.id', ondelete="CASCADE")) # TODO(dcramer): repo/sha/patch_id should be removed in favor of source revision_sha = Column(String(40)) repository_id = Column(GUID, ForeignKey('repository.id', ondelete="CASCADE"), nullable=False) patch_id = Column(GUID, ForeignKey('patch.id', ondelete="CASCADE")) author_id = Column(GUID, ForeignKey('author.id', ondelete="CASCADE")) cause = Column(Enum(Cause), nullable=False, default=Cause.unknown) label = Column(String(128), nullable=False) target = Column(String(128)) status = Column(Enum(Status), nullable=False, default=Status.unknown) result = Column(Enum(Result), nullable=False, default=Result.unknown) message = Column(Text) duration = Column(Integer) date_started = Column(DateTime) date_finished = Column(DateTime) date_created = Column(DateTime, default=datetime.utcnow) date_modified = Column(DateTime, default=datetime.utcnow) data = Column(JSONEncodedDict) project = relationship('Project', innerjoin=True) repository = relationship('Repository') source = relationship('Source', innerjoin=True) patch = relationship('Patch') author = relationship('Author') __repr__ = model_repr('label', 'target') def __init__(self, **kwargs): super(Build, self).__init__(**kwargs) if self.id is None: self.id = uuid.uuid4() if self.result is None: self.result = Result.unknown if self.status is None: self.status = Status.unknown if self.date_created is None: self.date_created = datetime.utcnow() if self.date_modified is None: self.date_modified = self.date_created if self.date_started and self.date_finished and not self.duration: self.duration = (self.date_finished - self.date_started).total_seconds() * 1000 if self.number is None and self.project: self.number = select([func.next_item_value(self.project.id.hex)])
class TestCase(db.Model): """ A single run of a single test, together with any captured output, retry-count and its return value. Every test that gets run ever has a row in this table. At the time this was written, it seems to have 400-500M rows (how is this still surviving?) NOTE: DO NOT MODIFY THIS TABLE! Running migration on this table has caused unavailability in the past. If you need to add a new column, consider doing that on a new table and linking it back to tests via the ID. """ __tablename__ = 'test' __table_args__ = ( UniqueConstraint('job_id', 'label_sha', name='unq_test_name'), Index('idx_test_step_id', 'step_id'), Index('idx_test_project_key', 'project_id', 'label_sha'), Index('idx_task_date_created', 'date_created'), Index('idx_test_project_key_date', 'project_id', 'label_sha', 'date_created'), ) id = Column(GUID, nullable=False, primary_key=True, default=uuid.uuid4) job_id = Column(GUID, ForeignKey('job.id', ondelete="CASCADE"), nullable=False) project_id = Column(GUID, ForeignKey('project.id', ondelete="CASCADE"), nullable=False) step_id = Column(GUID, ForeignKey('jobstep.id', ondelete="CASCADE")) name_sha = Column('label_sha', String(40), nullable=False) name = Column(Text, nullable=False) _package = Column('package', Text, nullable=True) result = Column(Enum(Result), default=Result.unknown, nullable=False) duration = Column(Integer, default=0) message = deferred(Column(Text)) date_created = Column(DateTime, default=datetime.utcnow, nullable=False) reruns = Column(Integer) # owner should be considered an unstructured string field. It may contain # email address ("Foo <*****@*****.**>", a username ("foo"), or something # else. This field is not used directly by Changes, so # providers + consumers on either side of Changes should be sure they know # what they're doing. owner = Column(Text) job = relationship('Job') step = relationship('JobStep') project = relationship('Project') __repr__ = model_repr('name', '_package', 'result') def __init__(self, **kwargs): super(TestCase, self).__init__(**kwargs) if self.id is None: self.id = uuid.uuid4() if self.result is None: self.result = Result.unknown if self.date_created is None: self.date_created = datetime.utcnow() @classmethod def calculate_name_sha(self, name): if name: return sha1(name).hexdigest() raise ValueError @property def sep(self): name = (self._package or self.name) # handle the case where it might begin with some special character if not re.match(r'^[a-zA-Z0-9]', name): return '/' elif '/' in name: return '/' return '.' def _get_package(self): if not self._package: try: package, _ = self.name.rsplit(self.sep, 1) except ValueError: package = None else: package = self._package return package def _set_package(self, value): self._package = value package = property(_get_package, _set_package) @property def short_name(self): name, package = self.name, self.package if package and name.startswith(package) and name != package: return name[len(package) + 1:] return name