def test_get_all_commits(): gr = GitRepository('test-repos/git-1/') change_sets = gr.get_change_sets() assert 13 == len(change_sets) assert 'e7d13b0511f8a176284ce4f92ed8c6e8d09c77f2' == change_sets[0].id assert '866e997a9e44cb4ddd9e00efe49361420aff2559' == change_sets[12].id
def test_get_change_sets(): gr = GitRepository('test-repos/test1/') assert gr is not None change_sets = gr.get_change_sets() to_zone = timezone(timedelta(hours=1)) cs1 = ChangeSet('a88c84ddf42066611e76e6cb690144e5357d132c', datetime(2018, 3, 22, 10, 41, 11, tzinfo=to_zone)) cs2 = ChangeSet('6411e3096dd2070438a17b225f44475136e54e3a', datetime(2018, 3, 22, 10, 41, 47, tzinfo=to_zone)) cs3 = ChangeSet('09f6182cef737db02a085e1d018963c7a29bde5a', datetime(2018, 3, 22, 10, 42, 3, tzinfo=to_zone)) to_zone = timezone(timedelta(hours=2)) cs4 = ChangeSet('1f99848edadfffa903b8ba1286a935f1b92b2845', datetime(2018, 3, 27, 17, 10, 52, tzinfo=to_zone)) assert cs1 in change_sets assert cs2 in change_sets assert cs3 in change_sets assert cs4 in change_sets assert 5 == len(change_sets)
class RepositoryMining: def __init__(self, path_to_repo: str, single: str = None, since: datetime = None, to: datetime = None, from_commit: str = None, to_commit: str = None, from_tag: str = None, to_tag: str = None, reversed_order: bool = False, only_in_main_branch: bool = False, only_in_branches: List[str] = None, only_modifications_with_file_types: List[str] = None, only_no_merge: bool = False, num_threads: int = 1): """ Init a repository mining. :param str path_to_repo: absolute path to the repository you have to analyze :param str single: hash of a single commit to analyze :param datetime since: starting date :param datetime to: ending date :param str from_commit: starting commit (only if `since` is None) :param str to_commit: ending commit (only if `to` is None) :param str from_tag: starting the analysis from specified tag (only if `since` and `from_commit` are None) :param str to_tag: ending the analysis from specified tag (only if `to` and `to_commit` are None) :param bool reversed_order: whether the commits should be analyzed in reversed order :param bool only_in_main_branch: whether only commits in main branch should be analyzed :param List[str] only_in_branches: only commits in these branches will be analyzed :param List[str] only_modifications_with_file_types: only modifications with that file types will be analyzed :param bool only_no_merge: if True, merges will not be analyzed """ self.git_repo = GitRepository(path_to_repo) self.single = single self.since = since self.to = to self.reversed_order = reversed_order self.only_in_main_branch = only_in_main_branch self.only_in_branches = only_in_branches self.only_modifications_with_file_types = only_modifications_with_file_types self.only_no_merge = only_no_merge self.num_threads = num_threads self._check_filters(from_commit, from_tag, since, single, to, to_commit, to_tag) self._check_timezones() def _check_filters(self, from_commit, from_tag, since, single, to, to_commit, to_tag): if single is not None: if since is not None or to is not None or from_commit is not None or \ to_commit is not None or from_tag is not None or to_tag is not None: raise Exception( 'You can not specify a single commit with other filters') if from_commit is not None: if since is not None: raise Exception( 'You can not specify both <since date> and <from commit>') self.since = self.git_repo.get_commit(from_commit).author_date if to_commit is not None: if to is not None: raise Exception( 'You can not specify both <to date> and <to commit>') self.to = self.git_repo.get_commit(to_commit).author_date if from_tag is not None: if since is not None or from_commit is not None: raise Exception( 'You can not specify <since date> or <from commit> when using <from tag>' ) self.since = self.git_repo.get_commit_from_tag( from_tag).author_date if to_tag is not None: if to is not None or to_commit is not None: raise Exception( 'You can not specify <to date> or <to commit> when using <to tag>' ) self.to = self.git_repo.get_commit_from_tag(to_tag).author_date def traverse_commits(self) -> Generator[Commit, None, None]: """ Analyze all the specified commits (all of them by default), returning a generator of commits. """ logger.info('Git repository in {}'.format(self.git_repo.path)) all_cs = self._apply_filters_on_changesets( self.git_repo.get_change_sets()) if not self.reversed_order: all_cs.reverse() for cs in all_cs: commit = self.git_repo.get_commit(cs.id) logger.info( 'Commit #{} in {} from {} with {} modifications'.format( commit.hash, commit.author_date, commit.author.name, len(commit.modifications))) if self._is_commit_filtered(commit): logger.info('Commit #{} filtered'.format(commit.hash)) continue yield commit def _is_commit_filtered(self, commit: Commit): if self.only_in_main_branch is True and commit.in_main_branch is False: logger.debug('Commit filtered for main branch') return True if self.only_in_branches is not None: logger.debug('Commit filtered for only in branches') if not self._commit_branch_in_branches(commit): return True if self.only_modifications_with_file_types is not None: logger.debug('Commit filtered for modification types') if not self._has_modification_with_file_type(commit): return True if self.only_no_merge is True and commit.merge is True: logger.debug('Commit filtered for no merge') return True return False def _commit_branch_in_branches(self, commit: Commit): for branch in commit.branches: if branch in self.only_in_branches: return True return False def _has_modification_with_file_type(self, commit): for mod in commit.modifications: if mod.filename.endswith( tuple(self.only_modifications_with_file_types)): return True return False def _apply_filters_on_changesets( self, all_cs: List[ChangeSet]) -> List[ChangeSet]: res = [] if self._all_filters_are_none(): return all_cs for cs in all_cs: if self.single is not None and cs.id == self.single: return [cs] if self.since is None or self.since <= cs.date: if self.to is None or cs.date <= self.to: res.append(cs) continue return res def _all_filters_are_none(self): return self.single is None and self.since is None and self.to is None def _check_timezones(self): if self.since is not None: if self.since.tzinfo is None or self.since.tzinfo.utcoffset( self.since) is None: self.since = self.since.replace(tzinfo=pytz.utc) if self.to is not None: if self.to.tzinfo is None or self.to.tzinfo.utcoffset( self.to) is None: self.to = self.to.replace(tzinfo=pytz.utc)