class DiffIssueOccurrence(BaseDocument): #calculated as hash(diff.hash,issue_occurrence.hash,key) hash = CharField(indexed = True,length = 64) configuration = CharField(indexed = True,length = 64) diff = ForeignKeyField('Diff',backref = 'issue_occurrences') issue_occurrence = ForeignKeyField('IssueOccurrence',backref = 'diff_issue_occurrences') key = EnumField(enums = ('added','fixed'))
class DiffFileRevision(BaseDocument): #calculated as hash(diff.hash,file_revision.hash,key) hash = CharField(indexed=True, length=64) configuration = CharField(indexed=True, length=64) diff = ForeignKeyField('Diff', backref='file_revisions') file_revision = ForeignKeyField('FileRevision', backref='diffs') key = EnumField(enums=('added', 'deleted', 'modified'))
class ProjectIssueClass(BaseDocument): project = ForeignKeyField('Project',backref = 'project_issue_classes') issue_class = ForeignKeyField('IssueClass',backref = 'project_issue_classes') enabled = BooleanField(default = True) class Meta(BaseDocument.Meta): unique_together = (('project','issue_class'),)
class Movie(Document): title = CharField(nullable = True,indexed = True) director = ForeignKeyField(related = 'Director',nullable = True,backref = 'movies') cast = ManyToManyField(related = 'Actor') year = IntegerField(indexed = True) best_actor = ForeignKeyField('Actor',backref = 'best_movies') class Meta(Document.Meta): dbref_includes = ['title','year']
class GitBranch(BaseDocument): project = ForeignKeyField('Project', backref='git_branches') name = CharField(indexed=True, length=100) hash = CharField(indexed=True, length=64) remote = CharField(indexed=True, length=100) last_analyzed_snapshot = ForeignKeyField('GitSnapshot') head_snapshot = ForeignKeyField('GitSnapshot') class Meta(BaseDocument.Meta): unique_together = [('project', 'name')]
class Director(Document): """ Warning: There is a circular foreign key relationship between Director and Movie, hence trying to save a pair of those objects that point to each other will yield an exception for e.g. the Postgres backend. """ name = CharField(indexed = True) favorite_actor = ForeignKeyField('Actor') best_movie = ForeignKeyField('Movie',unique=True,backref = 'best_of_director')
class IssueOccurrence(BaseDocument): #can be uniquely identified by its filerevision.pk, issue.pk and from_row,to_row,from_column,to_column,sequence #calculated as hash(file_revision.hash,issue.hash,from_row,to_row,from_column,to_column,sequence) hash = CharField(indexed = True,length = 64) file_revision = ForeignKeyField('FileRevision',backref = 'issue_occurrences') issue = ForeignKeyField('Issue',backref = 'issue_occurrences') from_row = IntegerField() to_row = IntegerField() from_column = IntegerField() to_column = IntegerField() sequence = IntegerField(default = 0)
class Movie(Document): title = CharField(nullable=True, indexed=True) director = ForeignKeyField(related="Director", nullable=True, backref="movies") cast = ManyToManyField(related="Actor") year = IntegerField(indexed=True) best_actor = ForeignKeyField("Actor", backref="best_movies") class Meta(Document.Meta): dbref_includes = ["title", "year"]
class Movie(Document): class Meta(Document.Meta): autoregister = False director = ForeignKeyField("Director", backref="movies") actors = ManyToManyField("Actor", backref="movies") name = CharField(indexed=True)
class Movie(Document): class Meta(Document.Meta): autoregister = False director = ForeignKeyField('Director', backref='movies') actors = ManyToManyField('Actor', backref='movies') name = CharField(indexed=True)
class DirectorAward(Document): class Meta(Document.Meta): autoregister = False name = CharField(indexed=True) director = ForeignKeyField('Director', backref='awards')
class Issue(BaseDocument): """ An `Issue` object represents an issue or problem with the code. It can be associated with one or multiple file revisions, code objects etc. An issue fingerprint should be a unique identifier for a given issue, hence if two issues have the same fingerprint they should be judged "identical". """ class IgnoreReason: not_specified = 0 not_relevant = 1 false_positive = 2 #calculated as hash(analyzer,code,fingerprint) hash = CharField(indexed=True, length=64) configuration = CharField(indexed=True, length=64) project = ForeignKeyField('Project', backref='issues', nullable=False) analyzer = CharField(indexed=True, length=100, nullable=False) code = CharField(indexed=True, length=100, nullable=False) fingerprint = CharField(indexed=True, length=255, nullable=False) #determines if this issue should be ignored ignore = BooleanField(indexed=True, default=False, nullable=False, server_default=False) #gives a reason for the issue to be ignored (e.g. false_positive, ) ignore_reason = IntegerField(indexed=True, nullable=True) #an optional comment for the ignore reason ignore_comment = CharField(indexed=False, length=255, nullable=True) class Meta(Document.Meta): unique_together = [('project', 'fingerprint', 'analyzer', 'code')] dbref_includes = ['code', 'analyzer']
class GitSnapshot(BaseDocument): """ """ project = ForeignKeyField('Project', unique=False, backref='git_snapshots') snapshot = ForeignKeyField('Snapshot', unique=True, backref='git_snapshot') sha = CharField(indexed=True, length=40) hash = CharField(indexed=True, length=64) committer_date = DateTimeField(indexed=True) author_date = DateTimeField(indexed=True) author_name = CharField(length=100) committer_date_ts = IntegerField(indexed=True) author_date_ts = IntegerField(indexed=True) tree_sha = CharField(indexed=True, length=40) log = TextField(indexed=False) class Meta(BaseDocument.Meta): unique_together = [('project', 'sha')]
class Example(BaseDocument): """ An example model that has a relationship to a given user. """ export_map = ("test", ) user = ForeignKeyField("User", backref="example", unique=True, ondelete="CASCADE") test = TextField()
class Task(BaseDocument): """ This class is used to store backend tasks that need to be performed, e.g. analyzing a project. """ class Type: analysis = "analysis" delete = "delete" reset = "reset" project = ForeignKeyField('Project') type = CharField(indexed=True, length=50) status = CharField(indexed=True, length=50) last_ping = DateTimeField(indexed=True)
class FileRevision(BaseDocument): #calculated as hash(path,sha) hash = CharField(indexed = True,length = 64) configuration = CharField(indexed = True, length = 64) project = ForeignKeyField('Project') path = CharField(indexed = True,length = 2000) language = CharField(indexed = True,length = 50) sha = CharField(indexed = True,length = 64) dependencies = ManyToManyField('FileRevision',backref = 'dependent_file_revisions') class Meta(Document.Meta): collection = "filerevision" def get_file_content(self): if hasattr(self,'_file_content'): if callable(self._file_content): return self._file_content() return self._file_content raise NotImplementedError
class Issue(BaseDocument): """ An `Issue` object represents an issue or problem with the code. It can be associated with one or multiple file revisions, code objects etc. An issue fingerprint should be a unique identifier for a given issue, hence if two issues have the same fingerprint they should be judged "identical". """ #calculated as hash(analyzer,code,fingerprint) hash = CharField(indexed = True,length = 64) configuration = CharField(indexed = True, length = 64) project = ForeignKeyField('Project',backref = 'issues',nullable = False) analyzer = CharField(indexed = True,length = 100,nullable = False) code = CharField(indexed = True,length = 100,nullable = False) fingerprint = CharField(indexed = True,length = 255,nullable = False) class Meta(Document.Meta): unique_together = [('project','fingerprint','analyzer','code')] dbref_includes = ['code','analyzer']
class Snapshot(BaseDocument): #calculated as by the creating object hash = CharField(indexed=True, length=64) configuration = CharField(indexed=True, length=64) project = ForeignKeyField('Project') file_revisions = ManyToManyField('FileRevision', backref='snapshots') analyzed = BooleanField(indexed=True) class Meta(Document.Meta): pass def load(self, data): """ Imports a snapshot from a data structure """ pass def export(self): """ Exports a snapshot to a data structure """ def summarize_issues(self, include_filename=False, ignore=False): if isinstance(self.backend, SqlBackend): return self._summarize_issues_sql( include_filename=include_filename, ignore=ignore) raise NotImplementedError def _summarize_issues_sql(self, include_filename=False, ignore=False): snapshot_file_revisions_table = self.backend.get_table( self.fields['file_revisions'].relationship_class) fr_table = self.backend.get_table(FileRevision) issue_table = self.backend.get_table(Issue) issue_occurrence_table = self.backend.get_table(IssueOccurrence) project_issue_class_table = self.backend.get_table(ProjectIssueClass) issue_class_table = self.backend.get_table(self.project.IssueClass) project_pk_type = self.backend.get_field_type( self.project.fields['pk']) snapshot_pk_type = self.backend.get_field_type(self.fields['pk']) #we group by file revision path, issue code and analyzer group_columns = [ fr_table.c.language, fr_table.c.path, issue_table.c.code, issue_table.c.analyzer ] #we perform a JOIN of the file revision table to the issue tables table = fr_table\ .join(issue_occurrence_table,fr_table.c.pk == issue_occurrence_table.c.file_revision)\ .join(issue_table, and_(issue_table.c.pk == issue_occurrence_table.c.issue, issue_table.c.ignore == ignore)) #here we make sure that the given issue class is enabled for the project subselect = select([issue_class_table.c.pk])\ .select_from(issue_class_table.join(project_issue_class_table))\ .where(and_( issue_table.c.analyzer == issue_class_table.c.analyzer, issue_table.c.code == issue_class_table.c.code, issue_table.c.ignore == ignore, project_issue_class_table.c.project == expression.cast(self.project.pk,project_pk_type), project_issue_class_table.c.enabled == True))\ file_revisions_select = select([snapshot_file_revisions_table.c.filerevision])\ .where(snapshot_file_revisions_table.c.snapshot == expression.cast(self.pk,snapshot_pk_type)) #we select the aggregated issues for all file revisions in this snapshot s = select(group_columns+[func.count().label('count')])\ .select_from(table)\ .where(and_(exists(subselect),fr_table.c.pk.in_(file_revisions_select)))\ .group_by(*group_columns)\ .order_by(fr_table.c.path) #we fetch the result with self.backend.transaction(): result = self.backend.connection.execute(s).fetchall() #we aggregate the issues by path fragments aggregator = lambda f: directory_splitter( f['path'], include_filename=include_filename) #we perform a map/reduce on the result #the resulting items will contain the number of files and the number of issues in the file map_reducer = IssuesMapReducer(aggregators=[aggregator]) return map_reducer.mapreduce(result)
class AccessToken(BaseDocument): token = CharField(indexed=True, length=64) user = ForeignKeyField('User', backref='access_tokens')
class Director(Document): name = CharField(indexed = True) favorite_actor = ForeignKeyField('Actor') best_movie = ForeignKeyField('Movie',unique = True,backref = 'best_of_director')
class Role(Document): role = CharField(indexed=True) actor = ForeignKeyField("Actor", nullable=False) movie = ForeignKeyField("Movie", nullable=False)
class Stripe(Document): user = ForeignKeyField(User, unique=True, backref="stripe")
class Subscription(Document): user = ForeignKeyField(User, unique=True, backref="subscription")
class Diff(BaseDocument): """ """ #calculated as hash(snapshot_a.hash,snapshot_b.hash) hash = CharField(indexed=True, length=64) configuration = CharField(indexed=True, length=64) project = ForeignKeyField('Project', backref='diffs') snapshot_a = ForeignKeyField('Snapshot', backref='diffs_a') snapshot_b = ForeignKeyField('Snapshot', backref='diffs_b') def get_issues_count(self, by_severity=False): if isinstance(self.backend, SqlBackend): return self._get_issues_count_sql(by_severity=by_severity) raise NotImplementedError def _get_issues_count_sql(self, by_severity=False): diff_issue_occurrence_table = self.backend.get_table( DiffIssueOccurrence) issue_class_table = self.backend.get_table(self.project.IssueClass) project_issue_class_table = self.backend.get_table(ProjectIssueClass) issue_occurrence_table = self.backend.get_table(IssueOccurrence) issue_table = self.backend.get_table(Issue) s = select([diff_issue_occurrence_table.c.key,issue_class_table.c.severity,func.count().label('count')])\ .select_from(diff_issue_occurrence_table\ .join(issue_occurrence_table,diff_issue_occurrence_table.c.issue_occurrence == issue_occurrence_table.c.pk)\ .join(issue_table)\ .join(issue_class_table,and_(issue_table.c.analyzer == issue_class_table.c.analyzer, issue_table.c.code == issue_class_table.c.code))\ .join(project_issue_class_table,and_( project_issue_class_table.c.issue_class == issue_class_table.c.pk, project_issue_class_table.c.enabled == True, project_issue_class_table.c.project == self.project.pk )))\ .where(diff_issue_occurrence_table.c.diff == self.pk)\ .group_by(diff_issue_occurrence_table.c.key,issue_class_table.c.severity) with self.backend.transaction(): result = self.backend.connection.execute(s).fetchall() if by_severity: counts = {'added': {}, 'fixed': {}} for row in result: if not row['severity'] in counts[row['key']]: counts[row['key']][row['severity']] = 0 counts[row['key']][row['severity']] += row['count'] else: counts = {'added': 0, 'fixed': 0} for row in result: counts[row['key']] += row['count'] return counts def _summarize_issues_sql(self, include_filename=False, ignore=False): diff_issue_occurrence_table = self.backend.get_table( DiffIssueOccurrence) issue_occurrence_table = self.backend.get_table(IssueOccurrence) issue_table = self.backend.get_table(Issue) file_revision_table = self.backend.get_table(FileRevision) project_issue_class_table = self.backend.get_table(ProjectIssueClass) issue_class_table = self.backend.get_table(self.project.IssueClass) #we group by file revision path, issue code and analyzer group_columns = [ file_revision_table.c.language, file_revision_table.c.path, diff_issue_occurrence_table.c['key'], #we should not group by pk # diff_issue_occurrence_table.c['pk'], issue_table.c.code, issue_table.c.analyzer ] project_pk_type = self.backend.get_field_type( self.project.fields['pk']) #here we make sure that the given issue class is enabled for the project subselect = select([issue_class_table.c.pk])\ .select_from(issue_class_table.join(project_issue_class_table))\ .where(and_( issue_table.c.analyzer == issue_class_table.c.analyzer, issue_table.c.code == issue_class_table.c.code, issue_table.c.ignore == ignore, project_issue_class_table.c.project == expression.cast(self.project.pk,project_pk_type), project_issue_class_table.c.enabled == True))\ #we perform a JOIN of the file revision table to the issue tables table = diff_issue_occurrence_table\ .join(issue_occurrence_table, issue_occurrence_table.c.pk == diff_issue_occurrence_table.c.issue_occurrence)\ .join(issue_table, and_(issue_occurrence_table.c.issue == issue_table.c.pk, issue_table.c.ignore == ignore))\ .join(file_revision_table) #we select the aggregated issues for all file revisions in this snapshot s = select(group_columns+[func.count().label('count')])\ .select_from(table)\ .where(and_(exists(subselect),diff_issue_occurrence_table.c.diff == self.pk))\ .group_by(*group_columns)\ .order_by(file_revision_table.c.path) #we fetch the result with self.backend.transaction(): result = self.backend.connection.execute(s).fetchall() #we aggregate the issues by path fragments aggregator = lambda f: directory_splitter( f['path'], include_filename=include_filename) added_issues = [] fixed_issues = [] for row in result: if row['key'] == 'added': added_issues.append(row) else: fixed_issues.append(row) #we perform a map/reduce on the result map_reducer = IssuesMapReducer( aggregators=[aggregator], group_by=['language', 'analyzer', 'code']) return { 'added': map_reducer.mapreduce(added_issues), 'fixed': map_reducer.mapreduce(fixed_issues) } def summarize_issues(self, include_filename=False, ignore=False): if isinstance(self.backend, SqlBackend): return self._summarize_issues_sql( include_filename=include_filename, ignore=ignore) raise NotImplementedError
class UserRole(BaseDocument): user = ForeignKeyField('User', backref='user_roles') project = ForeignKeyField('Project', backref='user_roles') role = CharField(indexed=True, length=30)
class GitRepository(BaseDocument): path_ = CharField(indexed=True) project = ForeignKeyField('Project', backref='git', unique=True) default_branch = CharField(indexed=True) @property def path(self): return self.path_ @path.setter def path(self, path): self.path_ = path def get_settings(self): default_branch = self.get_default_branch() if default_branch is None: return branches = self.repository.get_branches() if default_branch in branches: latest_commit = self.repository.get_commits(default_branch, limit=1)[0] try: checkmate_file_content = self.repository\ .get_file_content(latest_commit['sha'],'.checkmate.yml') try: checkmate_settings = yaml.load(checkmate_file_content) return checkmate_settings except: raise ValueError("Cannot parse checkmate YML file!") except: logger.warning("No .checkmate.yml file found!") return @property def repository(self): if not hasattr(self, '_repository'): self._repository = Repository(self.path) return self._repository def get_snapshots(self, **kwargs): """ Returns a list of snapshots in a given repository. """ commits = self.repository.get_commits(**kwargs) snapshots = [] for commit in commits: for key in ('committer_date', 'author_date'): commit[key] = datetime.datetime.fromtimestamp(commit[key + '_ts']) snapshot = GitSnapshot(commit) hasher = Hasher() hasher.add(snapshot.sha) snapshot.hash = hasher.digest.hexdigest() snapshot.project = self.project snapshot.pk = uuid.uuid4().hex snapshots.append(snapshot) return snapshots def get_file_revisions(self, commit_sha, filters=None): files = self.repository.get_files_in_commit(commit_sha) if filters: for filter_func in filters: files = [ f for f in files if f['path'] in filter_func([ff['path'] for ff in files]) ] file_revisions = [] for file_obj in files: hasher = Hasher() file_revision = FileRevision(file_obj) hasher.add(file_revision.path) hasher.add(file_revision.sha) file_revision.project = self.project file_revision.hash = hasher.digest.hexdigest() file_revision.pk = uuid.uuid4().hex file_revision._file_content = lambda commit_sha=commit_sha, file_revision=file_revision: self.repository.get_file_content( commit_sha, file_revision.path) file_revisions.append(file_revision) return file_revisions def get_default_branch(self): branches = self.repository.get_branches() if self.default_branch in branches: return self.default_branch elif 'origin/master' in branches: return 'origin/master' elif branches: return branches[0] else: return
class DiskSnapshot(BaseDocument): snapshot = ForeignKeyField('Snapshot', backref='disk_snapshot', unique=True)