class Email(db.Model): """Email model.""" __tablename__ = 'email' email = db.Column(db.String(240), primary_key=True) # Contributor contributor_login = db.Column( db.String(240), ForeignKey('contributor.login'), index=True, ) contributor = db.relationship("Contributor", back_populates="emails") # Commits relationships author_commits = db.relationship( "Commit", back_populates="author_email", foreign_keys='Commit.author_email_address', ) committer_commits = db.relationship( "Commit", back_populates="committer_email", foreign_keys='Commit.committer_email_address', ) unknown = db.Column(db.Boolean(), default=False) def __init__(self, email, contributor=None): """Constructor.""" self.email = email self.contributor = contributor @staticmethod def get_email(email_address: str, session, do_commit=True): """Create new email. Try multiple times, as we can get Multiple additions through threading. """ _try = 0 tries = 3 exception = None while _try <= tries: try: email = session.query(Email).get(email_address) if not email: email = Email(email_address) session.add(email) if do_commit: session.commit() return email except IntegrityError as e: print(f'Got an Email IntegrityError, Try {_try} of {tries}') session.rollback() _try += 1 exception = e pass raise exception
class Organization(db.Model): """Organization model.""" __tablename__ = 'organization' login = db.Column(db.String(240), primary_key=True) url = db.Column(db.String(240)) contributors = db.relationship("Contributor", secondary=contributor_organizations, back_populates="organizations") def __init__(self, login, url): """Construct an `Organization`.""" self.login = login self.url = url @staticmethod def get_organization(login: str, url: str, session): """Create new organization or add repository to it's list. Try multiple times, as we can get Multiple additions through threading. """ _try = 0 tries = 3 exception = None while _try <= tries: try: organization = session.query(Organization).get(login) if not organization: organization = Organization(login, url) session.add(organization) session.commit() return organization except IntegrityError as e: print( f'Got an Organization IntegrityError, Try {_try} of {tries}' ) _try += 1 exception = e pass raise exception
class AnalysisResult(db.Model): """Analysis results.""" __tablename__ = 'analysis_result' id = db.Column(UUID(as_uuid=True), primary_key=True, default=uuid4) contributer_login = db.Column(db.String(240), ForeignKey('contributor.login', ondelete='CASCADE', onupdate='CASCADE', deferrable=True), index=True) # Timezone check commit_count = db.Column(db.Integer()) timezone_switches = db.Column(db.Integer()) different_timezones = db.Column(db.Integer()) last_change = db.Column(db.DateTime()) intermediate_results = db.Column(JSONB) # Relationships contributor = db.relationship("Contributor", back_populates="analysis_result")
class Commit(db.Model): """Commit model.""" __tablename__ = 'commit' __table_args__ = (CheckConstraint( "(additions is NULL and deletions is NULL) or " "(additions is not NULL and deletions is not NULL)", ), ) sha = db.Column(db.String(40), primary_key=True) commit_time = db.Column(db.DateTime(timezone=True)) commit_time_offset = db.Column(db.Interval()) creation_time = db.Column(db.DateTime(timezone=True)) creation_time_offset = db.Column(db.Interval()) additions = db.Column(db.Integer()) deletions = db.Column(db.Integer()) # Email addresses author_email_address = db.Column( db.String(240), ForeignKey('email.email'), index=True, nullable=False, ) committer_email_address = db.Column( db.String(240), ForeignKey('email.email'), index=True, nullable=False, ) author_email = db.relationship( "Email", back_populates="author_commits", foreign_keys=[author_email_address], ) committer_email = db.relationship( "Email", back_populates="committer_commits", foreign_keys=[committer_email_address], ) repositories = db.relationship( "Repository", secondary=commit_repository, back_populates="commits", ) def __init__(self, sha, repository, author_email, committer_email): """Constructor.""" self.sha = sha self.author_email = author_email self.committer_email = committer_email self.repositories.append(repository) def local_time(self): """Get the local commit time for this commit.""" new_time = self.commit_time if self.commit_time.utcoffset(): new_time = self.commit_time - self.commit_time.utcoffset() new_time = new_time.replace(tzinfo=None) new_time += self.commit_time_offset return new_time
class Contributor(db.Model): """Contributor model.""" __tablename__ = 'contributor' login = db.Column(db.String(240), primary_key=True, nullable=False) emails = db.relationship("Email", back_populates="contributor") location = db.Column(db.String(240)) # Relationships repositories = db.relationship( "Repository", secondary=contributor_repository, back_populates="contributors") organizations = db.relationship( "Organization", secondary=contributor_organizations, back_populates="contributors") analysis_result = db.relationship( "AnalysisResult", uselist=False, back_populates="contributor") too_big = db.Column(db.Boolean, default=False, server_default='FALSE', nullable=False) last_full_scan = db.Column(db.DateTime(timezone=True)) def __init__(self, login: str): """Constructor.""" self.login = login def __repr__(self): """Format a `Contributor` object.""" return f'<Contributor {self.login}>' @staticmethod def get_contributor(login: str, session, eager_repositories=False, do_commit=True): """Create new contributor or add repository to it's list. Try multiple times, as we can get Multiple additions through threading. """ _try = 0 tries = 3 exception = None while _try <= tries: try: contributor = session.query(Contributor) if eager_repositories: contributor.options(joinedload(Contributor.repositories)) contributor = contributor.get(login) if not contributor: # Commit to prevent data loss in case we get an # integrity error and need to rollback. contributor = Contributor(login) session.add(contributor) if do_commit: session.commit() return contributor except IntegrityError as e: logger.error(f'Got an Contributor IntegrityError, Try {_try} of {tries}') session.rollback() _try += 1 exception = e pass raise exception def should_scan(self): """Check if the user has been scanned in the last day. If that is the case, we want to skip it. """ no_repositories = len(self.repositories) == 0 if no_repositories or self.last_full_scan is None: return True rescan_interval = int(config['aggregator']['contributor_rescan_interval']) rescan_threshold = datetime.now(timezone.utc) - timedelta(seconds=rescan_interval) up_to_date = self.last_full_scan and self.last_full_scan >= rescan_threshold if not up_to_date: return True return False
class Repository(db.Model): """Repository model.""" __tablename__ = 'repository' clone_url = db.Column(db.String(240), primary_key=True) parent_url = db.Column( db.String(240), ForeignKey('repository.clone_url', ondelete='SET NULL', onupdate='CASCADE', deferrable=True), index=True, ) name = db.Column(db.String(240)) full_name = db.Column(db.String(240), unique=True) created_at = db.Column(db.DateTime(timezone=True)) fork = db.Column(db.Boolean(), default=False, server_default='FALSE', nullable=False) broken = db.Column(db.Boolean(), default=False, server_default='FALSE', nullable=False) too_big = db.Column(db.Boolean(), default=False, server_default='FALSE', nullable=False) completely_scanned = db.Column(db.Boolean(), default=False, server_default='FALSE', nullable=False) updated_at = db.Column(db.DateTime, server_default=func.now(), nullable=False) children = db.relationship( "Repository", backref=backref('parent', remote_side=[clone_url]), ) commits = db.relationship( "Commit", secondary=commit_repository, back_populates="repositories", ) commits_by_hash = db.relationship( "Commit", collection_class=attribute_mapped_collection('sha'), secondary=commit_repository, ) contributors = db.relationship( "Contributor", secondary=contributor_repository, back_populates="repositories", ) def __init__(self, clone_url, name=None, full_name=None): """Constructor.""" self.clone_url = clone_url self.name = name self.full_name = full_name @staticmethod def get_or_create(session, clone_url: str, name=None, full_name=None): """Get an existing repository from db or create a new one.""" repo = session.query(Repository).get(clone_url) if not repo: repo = Repository(clone_url, name, full_name) session.add(repo) session.commit() return repo def should_scan(self): """Check if the repo has been updated in the last day. If that is the case, we want to skip it. """ rescan_interval = int( config['aggregator']['repository_rescan_interval']) rescan_threshold = datetime.utcnow() - timedelta( seconds=rescan_interval) up_to_date = self.completely_scanned and self.updated_at >= rescan_threshold if self.fork or self.broken or self.too_big or up_to_date: return False return True def is_invalid(self): """Check if we should skip this repository for for checking.""" return (self.broken or self.too_big)