class DiffIssueOccurrence(BaseDocument): #calculated as hash(diff.hash,issue_occurrence.hash,key) hash = CharField(indexed = True,length = 64) configuration = CharField(indexed = True,length = 64) diff = ForeignKeyField('Diff',backref = 'issue_occurrences') issue_occurrence = ForeignKeyField('IssueOccurrence',backref = 'diff_issue_occurrences') key = EnumField(enums = ('added','fixed'))
class DiffFileRevision(BaseDocument): #calculated as hash(diff.hash,file_revision.hash,key) hash = CharField(indexed=True, length=64) configuration = CharField(indexed=True, length=64) diff = ForeignKeyField('Diff', backref='file_revisions') file_revision = ForeignKeyField('FileRevision', backref='diffs') key = EnumField(enums=('added', 'deleted', 'modified'))
class Actor(Document): name = CharField(indexed=True) gross_income_m = FloatField(indexed=True) salary_amount = FloatField(indexed=True, key="salary.amount") salary_currency = CharField(indexed=True, key="salary.currency") appearances = IntegerField(indexed=True) birth_year = IntegerField(indexed=True) favorite_food = ManyToManyField("Food") is_funny = BooleanField(indexed=True) movies = ManyToManyField("Movie", backref="actors")
class Actor(Document): name = CharField(indexed=True) gross_income_m = FloatField(indexed=True) salary_amount = FloatField(indexed=True, key='salary.amount') salary_currency = CharField(indexed=True, key='salary.currency') appearances = IntegerField(indexed=True) birth_year = IntegerField(indexed=True) favorite_food = ManyToManyField('Food') is_funny = BooleanField(indexed=True) movies = ManyToManyField('Movie', backref='actors')
class GitBranch(BaseDocument): project = ForeignKeyField('Project', backref='git_branches') name = CharField(indexed=True, length=100) hash = CharField(indexed=True, length=64) remote = CharField(indexed=True, length=100) last_analyzed_snapshot = ForeignKeyField('GitSnapshot') head_snapshot = ForeignKeyField('GitSnapshot') class Meta(BaseDocument.Meta): unique_together = [('project', 'name')]
class Task(BaseDocument): """ This class is used to store backend tasks that need to be performed, e.g. analyzing a project. """ class Type: analysis = "analysis" delete = "delete" reset = "reset" project = ForeignKeyField('Project') type = CharField(indexed=True, length=50) status = CharField(indexed=True, length=50) last_ping = DateTimeField(indexed=True)
class Movie(Document): class Meta(Document.Meta): autoregister = False director = ForeignKeyField('Director', backref='movies') actors = ManyToManyField('Actor', backref='movies') name = CharField(indexed=True)
class DirectorAward(Document): class Meta(Document.Meta): autoregister = False name = CharField(indexed=True) director = ForeignKeyField('Director', backref='awards')
class Movie(Document): class Meta(Document.Meta): autoregister = False director = ForeignKeyField("Director", backref="movies") actors = ManyToManyField("Actor", backref="movies") name = CharField(indexed=True)
class Meta: PkType = CharField(length=32, primary_key=True, indexed=True, nullable=False) primary_key = "pk" indexes = {}
class GitSnapshot(BaseDocument): """ """ project = ForeignKeyField('Project', unique=False, backref='git_snapshots') snapshot = ForeignKeyField('Snapshot', unique=True, backref='git_snapshot') sha = CharField(indexed=True, length=40) hash = CharField(indexed=True, length=64) committer_date = DateTimeField(indexed=True) author_date = DateTimeField(indexed=True) author_name = CharField(length=100) committer_date_ts = IntegerField(indexed=True) author_date_ts = IntegerField(indexed=True) tree_sha = CharField(indexed=True, length=40) log = TextField(indexed=False) class Meta(BaseDocument.Meta): unique_together = [('project', 'sha')]
class FileRevision(BaseDocument): #calculated as hash(path,sha) hash = CharField(indexed = True,length = 64) configuration = CharField(indexed = True, length = 64) project = ForeignKeyField('Project') path = CharField(indexed = True,length = 2000) language = CharField(indexed = True,length = 50) sha = CharField(indexed = True,length = 64) dependencies = ManyToManyField('FileRevision',backref = 'dependent_file_revisions') class Meta(Document.Meta): collection = "filerevision" def get_file_content(self): if hasattr(self,'_file_content'): if callable(self._file_content): return self._file_content() return self._file_content raise NotImplementedError
class Movie(Document): title = CharField(nullable = True,indexed = True) director = ForeignKeyField(related = 'Director',nullable = True,backref = 'movies') cast = ManyToManyField(related = 'Actor') year = IntegerField(indexed = True) best_actor = ForeignKeyField('Actor',backref = 'best_movies') class Meta(Document.Meta): dbref_includes = ['title','year']
class Issue(BaseDocument): """ An `Issue` object represents an issue or problem with the code. It can be associated with one or multiple file revisions, code objects etc. An issue fingerprint should be a unique identifier for a given issue, hence if two issues have the same fingerprint they should be judged "identical". """ #calculated as hash(analyzer,code,fingerprint) hash = CharField(indexed = True,length = 64) configuration = CharField(indexed = True, length = 64) project = ForeignKeyField('Project',backref = 'issues',nullable = False) analyzer = CharField(indexed = True,length = 100,nullable = False) code = CharField(indexed = True,length = 100,nullable = False) fingerprint = CharField(indexed = True,length = 255,nullable = False) class Meta(Document.Meta): unique_together = [('project','fingerprint','analyzer','code')] dbref_includes = ['code','analyzer']
class Director(Document): """ Warning: There is a circular foreign key relationship between Director and Movie, hence trying to save a pair of those objects that point to each other will yield an exception for e.g. the Postgres backend. """ name = CharField(indexed = True) favorite_actor = ForeignKeyField('Actor') best_movie = ForeignKeyField('Movie',unique=True,backref = 'best_of_director')
class User(BaseDocument): name = CharField(indexed=True, unique=True, length=50) email = CharField(indexed=True, unique=True, length=255) new_email = CharField(indexed=True, unique=False, length=255) email_change_requested_at = DateTimeField() email_validated = BooleanField(indexed=True, default=False) email_validation_code = CharField(indexed=True, length=64) password = CharField(indexed=False, length=128) password_reset_code = CharField(indexed=True, length=64) password_reset_requested_at = DateTimeField() terms_accepted = BooleanField(default=False) terms_accepted_at = DateTimeField() superuser = BooleanField(default=False) delete = BooleanField(default=False, indexed=True) def set_password(self, password): self.password = pbkdf2_sha256.hash(password) self.password_set = True def check_password(self, password): return pbkdf2_sha256.verify(password, self.password) def get_access_token(self): access_token = AccessToken({'user': self, 'token': uuid.uuid4().hex}) return access_token def is_superuser(self): return True if 'superuser' in self and self.superuser else False
class Issue(BaseDocument): """ An `Issue` object represents an issue or problem with the code. It can be associated with one or multiple file revisions, code objects etc. An issue fingerprint should be a unique identifier for a given issue, hence if two issues have the same fingerprint they should be judged "identical". """ class IgnoreReason: not_specified = 0 not_relevant = 1 false_positive = 2 #calculated as hash(analyzer,code,fingerprint) hash = CharField(indexed=True, length=64) configuration = CharField(indexed=True, length=64) project = ForeignKeyField('Project', backref='issues', nullable=False) analyzer = CharField(indexed=True, length=100, nullable=False) code = CharField(indexed=True, length=100, nullable=False) fingerprint = CharField(indexed=True, length=255, nullable=False) #determines if this issue should be ignored ignore = BooleanField(indexed=True, default=False, nullable=False, server_default=False) #gives a reason for the issue to be ignored (e.g. false_positive, ) ignore_reason = IntegerField(indexed=True, nullable=True) #an optional comment for the ignore reason ignore_comment = CharField(indexed=False, length=255, nullable=True) class Meta(Document.Meta): unique_together = [('project', 'fingerprint', 'analyzer', 'code')] dbref_includes = ['code', 'analyzer']
class IssueOccurrence(BaseDocument): #can be uniquely identified by its filerevision.pk, issue.pk and from_row,to_row,from_column,to_column,sequence #calculated as hash(file_revision.hash,issue.hash,from_row,to_row,from_column,to_column,sequence) hash = CharField(indexed = True,length = 64) file_revision = ForeignKeyField('FileRevision',backref = 'issue_occurrences') issue = ForeignKeyField('Issue',backref = 'issue_occurrences') from_row = IntegerField() to_row = IntegerField() from_column = IntegerField() to_column = IntegerField() sequence = IntegerField(default = 0)
class Movie(Document): title = CharField(nullable=True, indexed=True) director = ForeignKeyField(related="Director", nullable=True, backref="movies") cast = ManyToManyField(related="Actor") year = IntegerField(indexed=True) best_actor = ForeignKeyField("Actor", backref="best_movies") class Meta(Document.Meta): dbref_includes = ["title", "year"]
class GitRepository(BaseGitRepository): public_key = TextField() private_key = TextField() url = CharField(indexed=True) @property def path(self): #we import this here to avoid cyclic dependency problems (shouldn't be the case though) from quantifiedcode.settings import settings if not self.eager.project.eager.pk: raise AttributeError('You must define a primary key for the project in order to get the repository path!') path = os.path.join(settings.get('project_path'), settings.get('backend.paths.git_repositories'), self.pk) return path
class IssueClass(BaseDocument): hash = CharField(indexed = True,length = 64) title = CharField(indexed = True,length = 100) analyzer = CharField(indexed = True,length = 50) language = CharField(indexed = True,length = 50) code = CharField(indexed = True,length = 50) description = TextField(indexed = False) occurrence_description = CharField(indexed = True,length = 2000) severity = IntegerField(indexed = True) categories = ManyToManyField('IssueCategory') class Meta(BaseDocument.Meta): unique_together = (('code','analyzer'),)
class IssueClass(BaseDocument): class Severity: critical = 1 potential_bug = 2 minor = 3 recommendation = 4 hash = CharField(indexed=True, length=64) title = CharField(indexed=True, length=100) analyzer = CharField(indexed=True, length=50) language = CharField(indexed=True, length=50) code = CharField(indexed=True, length=50) description = TextField(indexed=False) #obsolete occurrence_description = CharField(indexed=True, length=2000) severity = IntegerField(indexed=True) categories = ManyToManyField('IssueCategory') class Meta(BaseDocument.Meta): unique_together = (('code', 'analyzer'), )
class Director(Document): class Meta(Document.Meta): autoregister = False name = CharField(indexed=True)
class Role(Document): role = CharField(indexed=True) actor = ForeignKeyField("Actor", nullable=False) movie = ForeignKeyField("Movie", nullable=False)
class Food(Document): name = CharField(indexed=True)
class Snapshot(BaseDocument): #calculated as by the creating object hash = CharField(indexed=True, length=64) configuration = CharField(indexed=True, length=64) project = ForeignKeyField('Project') file_revisions = ManyToManyField('FileRevision', backref='snapshots') analyzed = BooleanField(indexed=True) class Meta(Document.Meta): pass def load(self, data): """ Imports a snapshot from a data structure """ pass def export(self): """ Exports a snapshot to a data structure """ def summarize_issues(self, include_filename=False, ignore=False): if isinstance(self.backend, SqlBackend): return self._summarize_issues_sql( include_filename=include_filename, ignore=ignore) raise NotImplementedError def _summarize_issues_sql(self, include_filename=False, ignore=False): snapshot_file_revisions_table = self.backend.get_table( self.fields['file_revisions'].relationship_class) fr_table = self.backend.get_table(FileRevision) issue_table = self.backend.get_table(Issue) issue_occurrence_table = self.backend.get_table(IssueOccurrence) project_issue_class_table = self.backend.get_table(ProjectIssueClass) issue_class_table = self.backend.get_table(self.project.IssueClass) project_pk_type = self.backend.get_field_type( self.project.fields['pk']) snapshot_pk_type = self.backend.get_field_type(self.fields['pk']) #we group by file revision path, issue code and analyzer group_columns = [ fr_table.c.language, fr_table.c.path, issue_table.c.code, issue_table.c.analyzer ] #we perform a JOIN of the file revision table to the issue tables table = fr_table\ .join(issue_occurrence_table,fr_table.c.pk == issue_occurrence_table.c.file_revision)\ .join(issue_table, and_(issue_table.c.pk == issue_occurrence_table.c.issue, issue_table.c.ignore == ignore)) #here we make sure that the given issue class is enabled for the project subselect = select([issue_class_table.c.pk])\ .select_from(issue_class_table.join(project_issue_class_table))\ .where(and_( issue_table.c.analyzer == issue_class_table.c.analyzer, issue_table.c.code == issue_class_table.c.code, issue_table.c.ignore == ignore, project_issue_class_table.c.project == expression.cast(self.project.pk,project_pk_type), project_issue_class_table.c.enabled == True))\ file_revisions_select = select([snapshot_file_revisions_table.c.filerevision])\ .where(snapshot_file_revisions_table.c.snapshot == expression.cast(self.pk,snapshot_pk_type)) #we select the aggregated issues for all file revisions in this snapshot s = select(group_columns+[func.count().label('count')])\ .select_from(table)\ .where(and_(exists(subselect),fr_table.c.pk.in_(file_revisions_select)))\ .group_by(*group_columns)\ .order_by(fr_table.c.path) #we fetch the result with self.backend.transaction(): result = self.backend.connection.execute(s).fetchall() #we aggregate the issues by path fragments aggregator = lambda f: directory_splitter( f['path'], include_filename=include_filename) #we perform a map/reduce on the result #the resulting items will contain the number of files and the number of issues in the file map_reducer = IssuesMapReducer(aggregators=[aggregator]) return map_reducer.mapreduce(result)
class Project(BaseDocument): IssueClass = IssueClass # contains a hash of the project configuration that will be used to mark # snapshots, diffs, file revisions etc. configuration = CharField(indexed=True, length=64) class Meta(Document.Meta): collection = "project" @property def settings(self): return self.get('settings', {}) def get_issue_classes(self, backend=None, enabled=True, sort=None, **kwargs): """ Retrieves the issue classes for a given backend :param backend: A backend to use. If None, the default backend will be used :param enabled: Whether to retrieve enabled or disabled issue classes. Passing `None` will retrieve all issue classes. """ if backend is None: backend = self.backend query = {'project_issue_classes.project': self} if enabled is not None: query['project_issue_classes.enabled'] = enabled issue_classes = backend.filter(self.IssueClass, query, **kwargs) if sort is not None: issue_classes = issue_classes.sort(sort) return issue_classes def get_issues_data(self, backend=None, extra_fields=None): if backend is None: backend = self.backend if extra_fields is None: extra_fields = [] issue_classes = self.get_issue_classes(include = (('categories','name'),), sort = [('categories.name',1)], only = extra_fields + \ ['title', 'analyzer', 'language', 'severity', 'description', 'code', 'pk'], raw = True) grouped_issue_data = {} for issue_class in issue_classes: language_data = grouped_issue_data if not issue_class['language'] or not issue_class[ 'analyzer'] or not issue_class['code']: continue if not issue_class['language'] in language_data: language_data[issue_class['language']] = { 'title': issue_class['language'], 'analyzers': {} } analyzer_data = language_data[issue_class['language']]['analyzers'] if not issue_class['analyzer'] in analyzer_data: analyzer_data[issue_class['analyzer']] = { 'title': issue_class['analyzer'], 'codes': {} } code_data = analyzer_data[issue_class['analyzer']]['codes'] code_data[issue_class['code']] = { 'severity': issue_class['severity'], 'title': issue_class['title'], 'categories': [category['name'] for category in issue_class['categories']], 'description': issue_class['description'], 'code': issue_class['code'], 'pk': issue_class['pk'] } for field_name in extra_fields: code_data[ issue_class['code']][field_name] = issue_class[field_name] return grouped_issue_data
class IssueCategory(BaseDocument): name = CharField(indexed=True, unique=True, length=50)
class Tag(BaseDocument): name = CharField(indexed=True, unique=True, length=50)
class GitRepository(BaseDocument): path_ = CharField(indexed=True) project = ForeignKeyField('Project', backref='git', unique=True) default_branch = CharField(indexed=True) @property def path(self): return self.path_ @path.setter def path(self, path): self.path_ = path def get_settings(self): default_branch = self.get_default_branch() if default_branch is None: return branches = self.repository.get_branches() if default_branch in branches: latest_commit = self.repository.get_commits(default_branch, limit=1)[0] try: checkmate_file_content = self.repository\ .get_file_content(latest_commit['sha'],'.checkmate.yml') try: checkmate_settings = yaml.load(checkmate_file_content) return checkmate_settings except: raise ValueError("Cannot parse checkmate YML file!") except: logger.warning("No .checkmate.yml file found!") return @property def repository(self): if not hasattr(self, '_repository'): self._repository = Repository(self.path) return self._repository def get_snapshots(self, **kwargs): """ Returns a list of snapshots in a given repository. """ commits = self.repository.get_commits(**kwargs) snapshots = [] for commit in commits: for key in ('committer_date', 'author_date'): commit[key] = datetime.datetime.fromtimestamp(commit[key + '_ts']) snapshot = GitSnapshot(commit) hasher = Hasher() hasher.add(snapshot.sha) snapshot.hash = hasher.digest.hexdigest() snapshot.project = self.project snapshot.pk = uuid.uuid4().hex snapshots.append(snapshot) return snapshots def get_file_revisions(self, commit_sha, filters=None): files = self.repository.get_files_in_commit(commit_sha) if filters: for filter_func in filters: files = [ f for f in files if f['path'] in filter_func([ff['path'] for ff in files]) ] file_revisions = [] for file_obj in files: hasher = Hasher() file_revision = FileRevision(file_obj) hasher.add(file_revision.path) hasher.add(file_revision.sha) file_revision.project = self.project file_revision.hash = hasher.digest.hexdigest() file_revision.pk = uuid.uuid4().hex file_revision._file_content = lambda commit_sha=commit_sha, file_revision=file_revision: self.repository.get_file_content( commit_sha, file_revision.path) file_revisions.append(file_revision) return file_revisions def get_default_branch(self): branches = self.repository.get_branches() if self.default_branch in branches: return self.default_branch elif 'origin/master' in branches: return 'origin/master' elif branches: return branches[0] else: return