def log(self, branch, head_commit_id): LOG.debug('Parsing git log for repo uri %s', self.repo['uri']) os.chdir(self.folder) if not self._checkout(branch): return commit_range = 'HEAD' if head_commit_id: commit_range = head_commit_id + '..HEAD' try: output = sh.git('log', '--pretty=' + GIT_LOG_FORMAT, '--shortstat', '-M', '--no-merges', commit_range, _tty_out=False, _decode_errors='ignore', _encoding='utf8') except sh.ErrorReturnCode as e: LOG.error('Unable to get log of git repo %s. Ignore it', self.repo['uri']) LOG.exception(e) return for rec in re.finditer(GIT_LOG_PATTERN, six.text_type(output)): i = 1 commit = {} for param in GIT_LOG_PARAMS: commit[param[0]] = rec.group(i) i += 1 if not commit['author_email']: # ignore commits with empty email (there are some < Essex) continue commit['author_email'] = utils.keep_safe_chars( commit['author_email']) diff_stat_str = rec.group('diff_stat') diff_rec = re.search(DIFF_STAT_PATTERN, diff_stat_str) if diff_rec: files_changed = int(diff_rec.group(1)) lines_changed_group = diff_rec.group(2) lines_changed = diff_rec.group(3) deleted_or_inserted = diff_rec.group(4) lines_deleted = diff_rec.group(5) if lines_changed_group: # there inserted or deleted lines if not lines_deleted: if deleted_or_inserted[0] == 'd': # deleted lines_deleted = lines_changed lines_changed = 0 else: files_changed = 0 lines_changed = 0 lines_deleted = 0 commit['files_changed'] = files_changed commit['lines_added'] = int(lines_changed or 0) commit['lines_deleted'] = int(lines_deleted or 0) for pattern_name, pattern in six.iteritems(MESSAGE_PATTERNS): collection = set() for item in re.finditer(pattern, commit['message']): collection.add(item.group('id')) if collection: commit[pattern_name] = list(collection) commit['date'] = int(commit['date']) commit['module'] = self.repo['module'] commit['branches'] = set([branch]) if commit['commit_id'] in self.release_index: commit['release'] = self.release_index[commit['commit_id']] else: commit['release'] = None if commit['release'] == 'ignored': # drop commits that are marked by 'ignored' release continue if 'blueprint_id' in commit: commit['blueprint_id'] = [(commit['module'] + ':' + bp_name) for bp_name in commit['blueprint_id']] if 'coauthor' in commit: verified_coauthors = [] for coauthor in commit['coauthor']: m = re.match(CO_AUTHOR_PATTERN, coauthor) if m and utils.check_email_validity( m.group("author_email")): verified_coauthors.append(m.groupdict()) if verified_coauthors: commit['coauthor'] = verified_coauthors else: del commit['coauthor'] # no valid authors yield commit
def test_keep_safe_chars(self): self.assertEqual('somemoretext', utils.keep_safe_chars('some more text')) self.assertEqual(u'(unicode)', utils.keep_safe_chars(u'(unicode \u0423) '))
def log(self, branch, head_commit_id): LOG.debug('Parsing git log for repo uri %s', self.repo['uri']) os.chdir(self.folder) if not self._checkout(branch): return commit_range = 'HEAD' if head_commit_id: commit_range = head_commit_id + '..HEAD' try: output = sh.git('log', '--pretty=' + GIT_LOG_FORMAT, '--shortstat', '-M', '--no-merges', commit_range, _tty_out=False, _decode_errors='ignore', _encoding='utf8') except sh.ErrorReturnCode as e: LOG.error('Unable to get log of git repo %s. Ignore it', self.repo['uri']) LOG.exception(e) return for rec in re.finditer(GIT_LOG_PATTERN, six.text_type(output)): i = 1 commit = {} for param in GIT_LOG_PARAMS: commit[param[0]] = rec.group(i) i += 1 if not commit['author_email']: # ignore commits with empty email (there are some < Essex) continue commit['author_email'] = utils.keep_safe_chars( commit['author_email']) diff_stat_str = rec.group('diff_stat') diff_rec = re.search(DIFF_STAT_PATTERN, diff_stat_str) if diff_rec: files_changed = int(diff_rec.group(1)) lines_changed_group = diff_rec.group(2) lines_changed = diff_rec.group(3) deleted_or_inserted = diff_rec.group(4) lines_deleted = diff_rec.group(5) if lines_changed_group: # there inserted or deleted lines if not lines_deleted: if deleted_or_inserted[0] == 'd': # deleted lines_deleted = lines_changed lines_changed = 0 else: files_changed = 0 lines_changed = 0 lines_deleted = 0 commit['files_changed'] = files_changed commit['lines_added'] = int(lines_changed or 0) commit['lines_deleted'] = int(lines_deleted or 0) for pattern_name, pattern in six.iteritems(MESSAGE_PATTERNS): collection = set() for item in re.finditer(pattern, commit['message']): collection.add(item.group('id')) if collection: commit[pattern_name] = list(collection) commit['date'] = int(commit['date']) commit['module'] = self.repo['module'] commit['branches'] = set([branch]) if commit['commit_id'] in self.release_index: commit['release'] = self.release_index[commit['commit_id']] else: commit['release'] = None if commit['release'] == 'ignored': # drop commits that are marked by 'ignored' release continue if 'blueprint_id' in commit: commit['blueprint_id'] = [(commit['module'] + ':' + bp_name) for bp_name in commit['blueprint_id'] ] if 'coauthor' in commit: verified_coauthors = [] for coauthor in commit['coauthor']: m = re.match(CO_AUTHOR_PATTERN, coauthor) if m and utils.check_email_validity( m.group("author_email")): verified_coauthors.append(m.groupdict()) if verified_coauthors: commit['coauthor'] = verified_coauthors else: del commit['coauthor'] # no valid authors yield commit
def test_keep_safe_chars(self): self.assertEqual('somemoretext', utils.keep_safe_chars('some more text')) self.assertEqual(u'(unicode)', utils.keep_safe_chars(u'(unicode \u0423) '))