def _retrieve_mails(uri): LOG.debug('Retrieving mail archive from uri: %s', uri) content = utils.read_uri(uri) if not content: LOG.error('Error reading mail archive from uri: %s', uri) return content = utils.gzip_decompress(content) LOG.debug('Mail archive is loaded, start processing') content += TRAILING_RECORD for rec in re.finditer(MAIL_BOX_PATTERN, content): email = rec.groupdict() email['author_email'] = email['author_email'].replace(' at ', '@', 1) if not utils.check_email_validity(email['author_email']): continue email['date'] = int(email_utils.mktime_tz( email_utils.parsedate_tz(email['date']))) for pattern_name, pattern in six.iteritems(MESSAGE_PATTERNS): collection = set() for item in re.finditer(pattern, email['body']): groups = item.groupdict() item_id = groups['id'] if 'module' in groups: item_id = groups['module'] + ':' + item_id email['module'] = groups['module'] collection.add(item_id) email[pattern_name] = list(collection) yield email
def _retrieve_mails(uri): LOG.debug("Retrieving mail archive from uri: %s", uri) content = utils.read_uri(uri) if not content: LOG.error("Error reading mail archive from uri: %s", uri) return gzip_fd = gzip.GzipFile(fileobj=StringIO.StringIO(content)) content = gzip_fd.read() LOG.debug("Mail archive is loaded, start processing") content += TRAILING_RECORD for rec in re.finditer(MAIL_BOX_PATTERN, content): email = rec.groupdict() email["author_email"] = email["author_email"].replace(" at ", "@", 1) if not utils.check_email_validity(email["author_email"]): continue email["date"] = int(email_utils.mktime_tz(email_utils.parsedate_tz(email["date"]))) for pattern_name, pattern in six.iteritems(MESSAGE_PATTERNS): collection = set() for item in re.finditer(pattern, email["body"]): groups = item.groupdict() item_id = groups["id"] if "module" in groups: item_id = groups["module"] + ":" + item_id email["module"] = groups["module"] collection.add(item_id) email[pattern_name] = list(collection) yield email
def _retrieve_mails(uri): LOG.debug('Retrieving mail archive from uri: %s', uri) content = utils.read_uri(uri) if not content: LOG.error('Error reading mail archive from uri: %s', uri) return content = utils.gzip_decompress(content) LOG.debug('Mail archive is loaded, start processing') content += TRAILING_RECORD for rec in re.finditer(MAIL_BOX_PATTERN, content): email = rec.groupdict() email['author_email'] = email['author_email'].replace(' at ', '@', 1) if not utils.check_email_validity(email['author_email']): continue email['date'] = int( email_utils.mktime_tz(email_utils.parsedate_tz(email['date']))) for pattern_name, pattern in six.iteritems(MESSAGE_PATTERNS): collection = set() for item in re.finditer(pattern, email['body']): groups = item.groupdict() item_id = groups['id'] if 'module' in groups: item_id = groups['module'] + ':' + item_id email['module'] = groups['module'] collection.add(item_id) email[pattern_name] = list(collection) yield email
def _get_lp_info(self, email): lp_profile = None if not utils.check_email_validity(email): LOG.debug('User email is not valid %s', email) else: lp_profile = launchpad_utils.lp_profile_by_email(email) if not lp_profile: LOG.debug('User with email %s not found', email) return None, None LOG.debug('Email is mapped to launchpad user: %s', lp_profile['name']) return lp_profile['name'], lp_profile['display_name']
def _get_lp_info(self, email): lp_profile = None if not utils.check_email_validity(email): LOG.debug("User email is not valid %s", email) else: lp_profile = launchpad_utils.lp_profile_by_email(email) if not lp_profile: LOG.debug("User with email %s not found", email) return None, None LOG.debug("Email %(email)s is mapped to launchpad user %(lp)s", {"email": email, "lp": lp_profile["name"]}) return lp_profile["name"], lp_profile["display_name"]
def query_lp_info(email): """Query Launchpad ID and user name by email :param email: user email :return: tuple (launchpad id, name) """ lp_profile = None if not utils.check_email_validity(email): LOG.debug('User email is not valid %s', email) else: lp_profile = _lp_profile_by_email(email) if not lp_profile: LOG.debug('User with email %s not found', email) return None, None LOG.debug('Email %(email)s is mapped to launchpad user %(lp)s', {'email': email, 'lp': lp_profile['name']}) return lp_profile['name'], lp_profile['display_name']
def _retrieve_mails(uri): LOG.debug('Retrieving mail archive from uri: %s', uri) content = utils.read_uri(uri) if not content: LOG.error('Error reading mail archive from uri: %s', uri) return # only gunzip if the uri has a .gz suffix matchgz = re.compile ('\.txt\.gz') if matchgz.search(uri): LOG.debug ('%s is a gzipped file', uri) gzip_fd = gzip.GzipFile(fileobj=StringIO.StringIO(content)) content = gzip_fd.read() else: LOG.debug ('%s is not a gzipped file', uri) LOG.debug('Mail archive is loaded, start processing') content += TRAILING_RECORD for rec in re.finditer(MAIL_BOX_PATTERN, content): email = rec.groupdict() email['author_email'] = email['author_email'].replace(' at ', '@', 1) if not utils.check_email_validity(email['author_email']): continue email['date'] = int(email_utils.mktime_tz( email_utils.parsedate_tz(email['date']))) for pattern_name, pattern in MESSAGE_PATTERNS.iteritems(): collection = set() for item in re.finditer(pattern, email['body']): groups = item.groupdict() item_id = groups['id'] if 'module' in groups: item_id = groups['module'] + ':' + item_id email['module'] = groups['module'] collection.add(item_id) email[pattern_name] = list(collection) yield email
def test_email_invalid(self): self.assertFalse(utils.check_email_validity('pupkin@localhost')) self.assertFalse(utils.check_email_validity('222@some.(trash)'))
def test_email_valid(self): self.assertTrue(utils.check_email_validity('*****@*****.**')) self.assertTrue(utils.check_email_validity('*****@*****.**'))
def log(self, branch, head_commit_id): LOG.debug('Parsing git log for repo uri %s', self.repo['uri']) os.chdir(self.folder) if not self._checkout(branch): return commit_range = 'HEAD' if head_commit_id: commit_range = head_commit_id + '..HEAD' try: output = sh.git('log', '--pretty=' + GIT_LOG_FORMAT, '--shortstat', '-M', '--no-merges', commit_range, _tty_out=False, _decode_errors='ignore') except sh.ErrorReturnCode as e: LOG.error('Unable to get log of git repo %s. Ignore it', self.repo['uri']) LOG.exception(e) return for rec in re.finditer(GIT_LOG_PATTERN, str(output)): i = 1 commit = {} for param in GIT_LOG_PARAMS: commit[param[0]] = six.text_type(rec.group(i), 'utf8') i += 1 if not utils.check_email_validity(commit['author_email']): continue commit['files_changed'] = int(rec.group(i)) i += 1 lines_changed_group = rec.group(i) i += 1 lines_changed = rec.group(i) i += 1 deleted_or_inserted = rec.group(i) i += 1 lines_deleted = rec.group(i) i += 1 if lines_changed_group: # there inserted or deleted lines if not lines_deleted: if deleted_or_inserted[0] == 'd': # deleted lines_deleted = lines_changed lines_changed = 0 commit['lines_added'] = int(lines_changed or 0) commit['lines_deleted'] = int(lines_deleted or 0) for pattern_name, pattern in six.iteritems(MESSAGE_PATTERNS): collection = set() for item in re.finditer(pattern, commit['message']): collection.add(item.group('id')) if collection: commit[pattern_name] = list(collection) commit['date'] = int(commit['date']) commit['module'] = self.repo['module'] commit['branches'] = set([branch]) if commit['commit_id'] in self.release_index: commit['release'] = self.release_index[commit['commit_id']] else: commit['release'] = None if 'blueprint_id' in commit: commit['blueprint_id'] = [(commit['module'] + ':' + bp_name) for bp_name in commit['blueprint_id']] if 'coauthor' in commit: verified_coauthors = [] for coauthor in commit['coauthor']: m = re.match(CO_AUTHOR_PATTERN, coauthor) if m and utils.check_email_validity( m.group("author_email")): verified_coauthors.append(m.groupdict()) if verified_coauthors: commit['coauthor'] = verified_coauthors else: del commit['coauthor'] # no valid authors yield commit
def log(self, branch, head_commit_id): LOG.debug('Parsing git log for repo uri %s', self.repo['uri']) os.chdir(self.folder) if not self._checkout(branch): return commit_range = 'HEAD' if head_commit_id: commit_range = head_commit_id + '..HEAD' try: output = sh.git('log', '--pretty=' + GIT_LOG_FORMAT, '--shortstat', '-M', '--no-merges', commit_range, _tty_out=False, _decode_errors='ignore', _encoding='utf8') except sh.ErrorReturnCode as e: LOG.error('Unable to get log of git repo %s. Ignore it', self.repo['uri']) LOG.exception(e) return for rec in re.finditer(GIT_LOG_PATTERN, six.text_type(output)): i = 1 commit = {} for param in GIT_LOG_PARAMS: commit[param[0]] = rec.group(i) i += 1 if not commit['author_email']: # ignore commits with empty email (there are some < Essex) continue commit['author_email'] = utils.keep_safe_chars( commit['author_email']) diff_stat_str = rec.group('diff_stat') diff_rec = re.search(DIFF_STAT_PATTERN, diff_stat_str) if diff_rec: files_changed = int(diff_rec.group(1)) lines_changed_group = diff_rec.group(2) lines_changed = diff_rec.group(3) deleted_or_inserted = diff_rec.group(4) lines_deleted = diff_rec.group(5) if lines_changed_group: # there inserted or deleted lines if not lines_deleted: if deleted_or_inserted[0] == 'd': # deleted lines_deleted = lines_changed lines_changed = 0 else: files_changed = 0 lines_changed = 0 lines_deleted = 0 commit['files_changed'] = files_changed commit['lines_added'] = int(lines_changed or 0) commit['lines_deleted'] = int(lines_deleted or 0) for pattern_name, pattern in six.iteritems(MESSAGE_PATTERNS): collection = set() for item in re.finditer(pattern, commit['message']): collection.add(item.group('id')) if collection: commit[pattern_name] = list(collection) commit['date'] = int(commit['date']) commit['module'] = self.repo['module'] commit['branches'] = set([branch]) if commit['commit_id'] in self.release_index: commit['release'] = self.release_index[commit['commit_id']] else: commit['release'] = None if commit['release'] == 'ignored': # drop commits that are marked by 'ignored' release continue if 'blueprint_id' in commit: commit['blueprint_id'] = [(commit['module'] + ':' + bp_name) for bp_name in commit['blueprint_id'] ] if 'coauthor' in commit: verified_coauthors = [] for coauthor in commit['coauthor']: m = re.match(CO_AUTHOR_PATTERN, coauthor) if m and utils.check_email_validity( m.group("author_email")): verified_coauthors.append(m.groupdict()) if verified_coauthors: commit['coauthor'] = verified_coauthors else: del commit['coauthor'] # no valid authors yield commit
def log(self, branch, head_commit_id): LOG.debug('Parsing git log for repo uri %s', self.repo['uri']) os.chdir(self.folder) if not self._checkout(branch): return commit_range = 'HEAD' if head_commit_id: commit_range = head_commit_id + '..HEAD' output = sh.git('log', '--pretty=%s' % GIT_LOG_FORMAT, '--shortstat', '-M', '--no-merges', commit_range, _tty_out=False, _decode_errors='ignore') for rec in re.finditer(GIT_LOG_PATTERN, str(output)): i = 1 commit = {} for param in GIT_LOG_PARAMS: commit[param[0]] = unicode(rec.group(i), 'utf8') i += 1 if not utils.check_email_validity(commit['author_email']): continue commit['files_changed'] = int(rec.group(i)) i += 1 lines_changed_group = rec.group(i) i += 1 lines_changed = rec.group(i) i += 1 deleted_or_inserted = rec.group(i) i += 1 lines_deleted = rec.group(i) i += 1 if lines_changed_group: # there inserted or deleted lines if not lines_deleted: if deleted_or_inserted[0] == 'd': # deleted lines_deleted = lines_changed lines_changed = 0 commit['lines_added'] = int(lines_changed or 0) commit['lines_deleted'] = int(lines_deleted or 0) for pattern_name, pattern in MESSAGE_PATTERNS.iteritems(): collection = set() for item in re.finditer(pattern, commit['message']): collection.add(item.group('id')) commit[pattern_name] = list(collection) commit['date'] = int(commit['date']) commit['module'] = self.repo['module'] commit['branches'] = set([branch]) if commit['commit_id'] in self.release_index: commit['release'] = self.release_index[commit['commit_id']] else: commit['release'] = None if 'blueprint_id' in commit: commit['blueprint_id'] = [(commit['module'] + ':' + bp_name) for bp_name in commit['blueprint_id']] yield commit
def log(self, branch, head_commit_id): LOG.debug('Parsing git log for repo uri %s', self.repo['uri']) os.chdir(self.folder) if not self._checkout(branch): return commit_range = 'HEAD' if head_commit_id: commit_range = head_commit_id + '..HEAD' try: output = sh.git('log', '--pretty=' + GIT_LOG_FORMAT, '--shortstat', '-M', '--no-merges', commit_range, _tty_out=False, _decode_errors='ignore', _encoding='utf8') except sh.ErrorReturnCode: LOG.error('Unable to get log of git repo %s. Ignore it', self.repo['uri'], exc_info=True) return for rec in re.finditer(GIT_LOG_PATTERN, six.text_type(output)): i = 1 commit = {} for param in GIT_LOG_PARAMS: commit[param[0]] = rec.group(i) i += 1 # ignore machine/script produced submodule auto updates if commit['subject'] == u'Update git submodules': continue if not commit['author_email']: # ignore commits with empty email (there are some < Essex) continue commit['author_email'] = utils.keep_safe_chars( commit['author_email']) diff_stat_str = rec.group('diff_stat') diff_rec = re.search(DIFF_STAT_PATTERN, diff_stat_str) if diff_rec: files_changed = int(diff_rec.group(1)) lines_changed_group = diff_rec.group(2) lines_changed = diff_rec.group(3) deleted_or_inserted = diff_rec.group(4) lines_deleted = diff_rec.group(5) if lines_changed_group: # there inserted or deleted lines if not lines_deleted: if deleted_or_inserted[0] == 'd': # deleted lines_deleted = lines_changed lines_changed = 0 else: files_changed = 0 lines_changed = 0 lines_deleted = 0 commit['files_changed'] = files_changed commit['lines_added'] = int(lines_changed or 0) commit['lines_deleted'] = int(lines_deleted or 0) for pattern_name, pattern in six.iteritems(MESSAGE_PATTERNS): collection = set() for item in re.finditer(pattern, commit['message']): collection.add(item.group('id')) if collection: commit[pattern_name] = list(collection) commit['date'] = int(commit['date']) commit['module'] = self.repo['module'] commit['branches'] = set([branch]) if commit['commit_id'] in self.release_index: commit['release'] = self.release_index[commit['commit_id']] else: commit['release'] = None if commit['release'] == 'ignored': # drop commits that are marked by 'ignored' release continue if 'blueprint_id' in commit: commit['blueprint_id'] = [(commit['module'] + ':' + bp_name) for bp_name in commit['blueprint_id']] if 'coauthor' in commit: verified_coauthors = [] for coauthor in commit['coauthor']: m = re.match(CO_AUTHOR_PATTERN, coauthor) if m and utils.check_email_validity( m.group("author_email")): verified_coauthors.append(m.groupdict()) if verified_coauthors: commit['coauthor'] = verified_coauthors else: del commit['coauthor'] # no valid authors yield commit
def log(self, branch, head_commit_id): LOG.debug('Parsing git log for repo uri %s', self.repo['uri']) os.chdir(self.folder) if not self._checkout(branch): return commit_range = 'HEAD' if head_commit_id: commit_range = head_commit_id + '..HEAD' output = sh.git('log', '--pretty=%s' % GIT_LOG_FORMAT, '--shortstat', '-M', '--no-merges', commit_range, _tty_out=False, _decode_errors='ignore') for rec in re.finditer(GIT_LOG_PATTERN, str(output)): i = 1 commit = {} for param in GIT_LOG_PARAMS: commit[param[0]] = unicode(rec.group(i), 'utf8') i += 1 if not utils.check_email_validity(commit['author_email']): continue commit['files_changed'] = int(rec.group(i)) i += 1 lines_changed_group = rec.group(i) i += 1 lines_changed = rec.group(i) i += 1 deleted_or_inserted = rec.group(i) i += 1 lines_deleted = rec.group(i) i += 1 if lines_changed_group: # there inserted or deleted lines if not lines_deleted: if deleted_or_inserted[0] == 'd': # deleted lines_deleted = lines_changed lines_changed = 0 commit['lines_added'] = int(lines_changed or 0) commit['lines_deleted'] = int(lines_deleted or 0) for pattern_name, pattern in MESSAGE_PATTERNS.iteritems(): collection = set() for item in re.finditer(pattern, commit['message']): collection.add(item.group('id')) commit[pattern_name] = list(collection) commit['date'] = int(commit['date']) commit['module'] = self.repo['module'] commit['branches'] = set([branch]) if commit['commit_id'] in self.release_index: commit['release'] = self.release_index[commit['commit_id']] else: commit['release'] = None if 'blueprint_id' in commit: commit['blueprint_id'] = [(commit['module'] + ':' + bp_name) for bp_name in commit['blueprint_id'] ] yield commit