Esempio n. 1
0
class Email(db.Model):
    """Email model."""

    __tablename__ = 'email'

    email = db.Column(db.String(240), primary_key=True)

    # Contributor
    contributor_login = db.Column(
        db.String(240),
        ForeignKey('contributor.login'),
        index=True,
    )
    contributor = db.relationship("Contributor", back_populates="emails")

    # Commits relationships
    author_commits = db.relationship(
        "Commit",
        back_populates="author_email",
        foreign_keys='Commit.author_email_address',
    )
    committer_commits = db.relationship(
        "Commit",
        back_populates="committer_email",
        foreign_keys='Commit.committer_email_address',
    )
    unknown = db.Column(db.Boolean(), default=False)

    def __init__(self, email, contributor=None):
        """Constructor."""
        self.email = email
        self.contributor = contributor

    @staticmethod
    def get_email(email_address: str, session, do_commit=True):
        """Create new email.

        Try multiple times, as we can get Multiple additions through threading.
        """
        _try = 0
        tries = 3
        exception = None
        while _try <= tries:
            try:
                email = session.query(Email).get(email_address)
                if not email:
                    email = Email(email_address)
                    session.add(email)
                    if do_commit:
                        session.commit()
                return email
            except IntegrityError as e:
                print(f'Got an Email IntegrityError, Try {_try} of {tries}')
                session.rollback()
                _try += 1
                exception = e
                pass

        raise exception
Esempio n. 2
0
class Organization(db.Model):
    """Organization model."""

    __tablename__ = 'organization'

    login = db.Column(db.String(240), primary_key=True)
    url = db.Column(db.String(240))

    contributors = db.relationship("Contributor",
                                   secondary=contributor_organizations,
                                   back_populates="organizations")

    def __init__(self, login, url):
        """Construct an `Organization`."""
        self.login = login
        self.url = url

    @staticmethod
    def get_organization(login: str, url: str, session):
        """Create new organization or add repository to it's list.

        Try multiple times, as we can get Multiple additions through threading.
        """
        _try = 0
        tries = 3
        exception = None
        while _try <= tries:
            try:
                organization = session.query(Organization).get(login)
                if not organization:
                    organization = Organization(login, url)
                session.add(organization)
                session.commit()
                return organization
            except IntegrityError as e:
                print(
                    f'Got an Organization IntegrityError, Try {_try} of {tries}'
                )
                _try += 1
                exception = e
                pass

        raise exception
Esempio n. 3
0
class TimezoneInterval(db.Model):
    """This class represents lol."""

    __tablename__ = 'timezone_interval'
    __table_args__ = (
        db.CheckConstraint('"start" < "end"'),
    )

    timezone = db.Column(db.String(120), nullable=False, primary_key=True)
    utcoffset = db.Column(db.Interval(), index=True)
    start = db.Column(db.DateTime(timezone=True), index=True, primary_key=True)
    end = db.Column(db.DateTime(timezone=True), index=True, primary_key=True)

    def __init__(self, timezone, offset, start, end):
        """Create a new TimezoneInterval."""
        self.timezone = timezone
        self.utcoffset = offset
        self.end = end
        self.start = start

    def __repr__(self):
        """Format a `TimezoneInterval` object."""
        return f'<TimezoneInterval {self.timezone}, {self.utcoffset}, {self.start}-{self.end}>'
Esempio n. 4
0
class AnalysisResult(db.Model):
    """Analysis results."""

    __tablename__ = 'analysis_result'

    id = db.Column(UUID(as_uuid=True), primary_key=True, default=uuid4)
    contributer_login = db.Column(db.String(240),
                                  ForeignKey('contributor.login',
                                             ondelete='CASCADE',
                                             onupdate='CASCADE',
                                             deferrable=True),
                                  index=True)

    # Timezone check
    commit_count = db.Column(db.Integer())
    timezone_switches = db.Column(db.Integer())
    different_timezones = db.Column(db.Integer())
    last_change = db.Column(db.DateTime())
    intermediate_results = db.Column(JSONB)

    # Relationships
    contributor = db.relationship("Contributor",
                                  back_populates="analysis_result")
Esempio n. 5
0
class Commit(db.Model):
    """Commit model."""

    __tablename__ = 'commit'
    __table_args__ = (CheckConstraint(
        "(additions is NULL and deletions is NULL) or "
        "(additions is not NULL and deletions is not NULL)", ), )

    sha = db.Column(db.String(40), primary_key=True)
    commit_time = db.Column(db.DateTime(timezone=True))
    commit_time_offset = db.Column(db.Interval())
    creation_time = db.Column(db.DateTime(timezone=True))
    creation_time_offset = db.Column(db.Interval())
    additions = db.Column(db.Integer())
    deletions = db.Column(db.Integer())

    # Email addresses
    author_email_address = db.Column(
        db.String(240),
        ForeignKey('email.email'),
        index=True,
        nullable=False,
    )
    committer_email_address = db.Column(
        db.String(240),
        ForeignKey('email.email'),
        index=True,
        nullable=False,
    )
    author_email = db.relationship(
        "Email",
        back_populates="author_commits",
        foreign_keys=[author_email_address],
    )
    committer_email = db.relationship(
        "Email",
        back_populates="committer_commits",
        foreign_keys=[committer_email_address],
    )

    repositories = db.relationship(
        "Repository",
        secondary=commit_repository,
        back_populates="commits",
    )

    def __init__(self, sha, repository, author_email, committer_email):
        """Constructor."""
        self.sha = sha
        self.author_email = author_email
        self.committer_email = committer_email
        self.repositories.append(repository)

    def local_time(self):
        """Get the local commit time for this commit."""
        new_time = self.commit_time
        if self.commit_time.utcoffset():
            new_time = self.commit_time - self.commit_time.utcoffset()
            new_time = new_time.replace(tzinfo=None)
            new_time += self.commit_time_offset

        return new_time
Esempio n. 6
0
"""Representation of a git commit."""
from sqlalchemy import ForeignKey, CheckConstraint

from gitalizer.extensions import db

commit_repository = db.Table(
    'commit_repository',
    db.Column('commit_sha',
              db.String(40),
              ForeignKey('commit.sha',
                         ondelete='CASCADE',
                         onupdate='CASCADE',
                         deferrable=True),
              index=True),
    db.Column('repository_clone_url',
              db.String(240),
              ForeignKey('repository.clone_url',
                         ondelete='CASCADE',
                         onupdate='CASCADE',
                         deferrable=True),
              index=True),
    db.UniqueConstraint('repository_clone_url', 'commit_sha'),
)


class Commit(db.Model):
    """Commit model."""

    __tablename__ = 'commit'
    __table_args__ = (CheckConstraint(
        "(additions is NULL and deletions is NULL) or "
Esempio n. 7
0
class Contributor(db.Model):
    """Contributor model."""

    __tablename__ = 'contributor'

    login = db.Column(db.String(240), primary_key=True, nullable=False)
    emails = db.relationship("Email", back_populates="contributor")
    location = db.Column(db.String(240))

    # Relationships
    repositories = db.relationship(
        "Repository",
        secondary=contributor_repository,
        back_populates="contributors")
    organizations = db.relationship(
        "Organization",
        secondary=contributor_organizations,
        back_populates="contributors")
    analysis_result = db.relationship(
        "AnalysisResult",
        uselist=False,
        back_populates="contributor")

    too_big = db.Column(db.Boolean, default=False, server_default='FALSE', nullable=False)
    last_full_scan = db.Column(db.DateTime(timezone=True))

    def __init__(self, login: str):
        """Constructor."""
        self.login = login

    def __repr__(self):
        """Format a `Contributor` object."""
        return f'<Contributor {self.login}>'

    @staticmethod
    def get_contributor(login: str, session, eager_repositories=False, do_commit=True):
        """Create new contributor or add repository to it's list.

        Try multiple times, as we can get Multiple additions through threading.
        """
        _try = 0
        tries = 3
        exception = None
        while _try <= tries:
            try:
                contributor = session.query(Contributor)
                if eager_repositories:
                    contributor.options(joinedload(Contributor.repositories))
                contributor = contributor.get(login)
                if not contributor:
                    # Commit to prevent data loss in case we get an
                    # integrity error and need to rollback.
                        contributor = Contributor(login)
                session.add(contributor)
                if do_commit:
                    session.commit()
                return contributor
            except IntegrityError as e:
                logger.error(f'Got an Contributor IntegrityError, Try {_try} of {tries}')
                session.rollback()
                _try += 1
                exception = e
                pass

        raise exception

    def should_scan(self):
        """Check if the user has been scanned in the last day.

        If that is the case, we want to skip it.
        """
        no_repositories = len(self.repositories) == 0
        if no_repositories or self.last_full_scan is None:
            return True

        rescan_interval = int(config['aggregator']['contributor_rescan_interval'])
        rescan_threshold = datetime.now(timezone.utc) - timedelta(seconds=rescan_interval)
        up_to_date = self.last_full_scan and self.last_full_scan >= rescan_threshold

        if not up_to_date:
            return True

        return False
Esempio n. 8
0
class Repository(db.Model):
    """Repository model."""

    __tablename__ = 'repository'

    clone_url = db.Column(db.String(240), primary_key=True)
    parent_url = db.Column(
        db.String(240),
        ForeignKey('repository.clone_url',
                   ondelete='SET NULL',
                   onupdate='CASCADE',
                   deferrable=True),
        index=True,
    )
    name = db.Column(db.String(240))
    full_name = db.Column(db.String(240), unique=True)
    created_at = db.Column(db.DateTime(timezone=True))

    fork = db.Column(db.Boolean(),
                     default=False,
                     server_default='FALSE',
                     nullable=False)
    broken = db.Column(db.Boolean(),
                       default=False,
                       server_default='FALSE',
                       nullable=False)
    too_big = db.Column(db.Boolean(),
                        default=False,
                        server_default='FALSE',
                        nullable=False)
    completely_scanned = db.Column(db.Boolean(),
                                   default=False,
                                   server_default='FALSE',
                                   nullable=False)
    updated_at = db.Column(db.DateTime,
                           server_default=func.now(),
                           nullable=False)

    children = db.relationship(
        "Repository",
        backref=backref('parent', remote_side=[clone_url]),
    )

    commits = db.relationship(
        "Commit",
        secondary=commit_repository,
        back_populates="repositories",
    )
    commits_by_hash = db.relationship(
        "Commit",
        collection_class=attribute_mapped_collection('sha'),
        secondary=commit_repository,
    )

    contributors = db.relationship(
        "Contributor",
        secondary=contributor_repository,
        back_populates="repositories",
    )

    def __init__(self, clone_url, name=None, full_name=None):
        """Constructor."""
        self.clone_url = clone_url
        self.name = name
        self.full_name = full_name

    @staticmethod
    def get_or_create(session, clone_url: str, name=None, full_name=None):
        """Get an existing repository from db or create a new one."""
        repo = session.query(Repository).get(clone_url)

        if not repo:
            repo = Repository(clone_url, name, full_name)
            session.add(repo)
            session.commit()

        return repo

    def should_scan(self):
        """Check if the repo has been updated in the last day.

        If that is the case, we want to skip it.
        """
        rescan_interval = int(
            config['aggregator']['repository_rescan_interval'])
        rescan_threshold = datetime.utcnow() - timedelta(
            seconds=rescan_interval)
        up_to_date = self.completely_scanned and self.updated_at >= rescan_threshold

        if self.fork or self.broken or self.too_big or up_to_date:
            return False

        return True

    def is_invalid(self):
        """Check if we should skip this repository for for checking."""
        return (self.broken or self.too_big)