Beispiel #1
0
def _retrieve_mails(uri):
    LOG.debug('Retrieving mail archive from uri: %s', uri)
    content = utils.read_uri(uri)
    if not content:
        LOG.error('Error reading mail archive from uri: %s', uri)
        return

    # only gunzip if the uri has a .gz suffix
    matchgz = re.compile('\.txt\.gz')
    if matchgz.search(uri):
        LOG.debug('%s is a gzipped file', uri)
        gzip_fd = gzip.GzipFile(fileobj=StringIO.StringIO(content))
        content = gzip_fd.read()
    else:
        LOG.debug('%s is not a gzipped file', uri)

    LOG.debug('Mail archive is loaded, start processing')

    content += TRAILING_RECORD

    for rec in re.finditer(MAIL_BOX_PATTERN, content):
        email = rec.groupdict()
        email['author_email'] = email['author_email'].replace(' at ', '@', 1)
        if not utils.check_email_validity(email['author_email']):
            continue

        email['date'] = int(
            email_utils.mktime_tz(email_utils.parsedate_tz(email['date'])))

        for pattern_name, pattern in six.iteritems(MESSAGE_PATTERNS):
            collection = set()
            for item in re.finditer(pattern, email['body']):
                groups = item.groupdict()
                item_id = groups['id']
                if 'module' in groups:
                    item_id = groups['module'] + ':' + item_id
                    email['module'] = groups['module']
                collection.add(item_id)
            email[pattern_name] = list(collection)

        yield email
Beispiel #2
0
def _retrieve_mails(uri):
    LOG.debug("Retrieving mail archive from uri: %s", uri)
    content = utils.read_uri(uri)
    if not content:
        LOG.error("Error reading mail archive from uri: %s", uri)
        return

    # only gunzip if the uri has a .gz suffix
    matchgz = re.compile("\.txt\.gz")
    if matchgz.search(uri):
        LOG.debug("%s is a gzipped file", uri)
        gzip_fd = gzip.GzipFile(fileobj=StringIO.StringIO(content))
        content = gzip_fd.read()
    else:
        LOG.debug("%s is not a gzipped file", uri)

    LOG.debug("Mail archive is loaded, start processing")

    content += TRAILING_RECORD

    for rec in re.finditer(MAIL_BOX_PATTERN, content):
        email = rec.groupdict()
        email["author_email"] = email["author_email"].replace(" at ", "@", 1)
        if not utils.check_email_validity(email["author_email"]):
            continue

        email["date"] = int(email_utils.mktime_tz(email_utils.parsedate_tz(email["date"])))

        for pattern_name, pattern in six.iteritems(MESSAGE_PATTERNS):
            collection = set()
            for item in re.finditer(pattern, email["body"]):
                groups = item.groupdict()
                item_id = groups["id"]
                if "module" in groups:
                    item_id = groups["module"] + ":" + item_id
                    email["module"] = groups["module"]
                collection.add(item_id)
            email[pattern_name] = list(collection)

        yield email
Beispiel #3
0
 def test_email_invalid(self):
     self.assertFalse(utils.check_email_validity('pupkin@localhost'))
     self.assertFalse(utils.check_email_validity('222@some.(trash)'))
Beispiel #4
0
 def test_email_valid(self):
     self.assertTrue(utils.check_email_validity('*****@*****.**'))
     self.assertTrue(utils.check_email_validity('*****@*****.**'))
Beispiel #5
0
    def log(self, branch, head_commit_id):
        LOG.debug('Parsing git log for repo uri %s', self.repo['uri'])

        os.chdir(self.folder)
        if not self._checkout(branch):
            return

        commit_range = 'HEAD'
        if head_commit_id:
            commit_range = head_commit_id + '..HEAD'
        output = sh.git('log', '--pretty=%s' % GIT_LOG_FORMAT, '--shortstat',
                        '-M', '--no-merges', commit_range, _tty_out=False,
                        _decode_errors='ignore')

        for rec in re.finditer(GIT_LOG_PATTERN, str(output)):
            i = 1
            commit = {}
            for param in GIT_LOG_PARAMS:
                commit[param[0]] = six.text_type(rec.group(i), 'utf8')
                i += 1

            if not utils.check_email_validity(commit['author_email']):
                continue

            commit['files_changed'] = int(rec.group(i))
            i += 1
            lines_changed_group = rec.group(i)
            i += 1
            lines_changed = rec.group(i)
            i += 1
            deleted_or_inserted = rec.group(i)
            i += 1
            lines_deleted = rec.group(i)
            i += 1

            if lines_changed_group:  # there inserted or deleted lines
                if not lines_deleted:
                    if deleted_or_inserted[0] == 'd':  # deleted
                        lines_deleted = lines_changed
                        lines_changed = 0

            commit['lines_added'] = int(lines_changed or 0)
            commit['lines_deleted'] = int(lines_deleted or 0)

            for pattern_name, pattern in six.iteritems(MESSAGE_PATTERNS):
                collection = set()
                for item in re.finditer(pattern, commit['message']):
                    collection.add(item.group('id'))
                if collection:
                    commit[pattern_name] = list(collection)

            commit['date'] = int(commit['date'])
            commit['module'] = self.repo['module']
            commit['branches'] = set([branch])
            if commit['commit_id'] in self.release_index:
                commit['release'] = self.release_index[commit['commit_id']]
            else:
                commit['release'] = None

            if 'blueprint_id' in commit:
                commit['blueprint_id'] = [(commit['module'] + ':' + bp_name)
                                          for bp_name
                                          in commit['blueprint_id']]

            if 'coauthor' in commit:
                verified_coauthors = []
                for coauthor in commit['coauthor']:
                    m = re.match(CO_AUTHOR_PATTERN, coauthor)
                    if m and utils.check_email_validity(
                            m.group("author_email")):
                        verified_coauthors.append(m.groupdict())

                if verified_coauthors:
                    commit['coauthor'] = verified_coauthors
                else:
                    del commit['coauthor']  # no valid authors

            yield commit
Beispiel #6
0
 def test_email_invalid(self):
     self.assertFalse(utils.check_email_validity('pupkin@localhost'))
     self.assertFalse(utils.check_email_validity('222@some.(trash)'))
Beispiel #7
0
 def test_email_valid(self):
     self.assertTrue(utils.check_email_validity('*****@*****.**'))
     self.assertTrue(utils.check_email_validity('*****@*****.**'))
Beispiel #8
0
    def log(self, branch, head_commit_id):
        LOG.debug('Parsing git log for repo uri %s', self.repo['uri'])

        os.chdir(self.folder)
        if not self._checkout(branch):
            return

        commit_range = 'HEAD'
        if head_commit_id:
            commit_range = head_commit_id + '..HEAD'
        output = sh.git('log',
                        '--pretty=%s' % GIT_LOG_FORMAT,
                        '--shortstat',
                        '-M',
                        '--no-merges',
                        commit_range,
                        _tty_out=False,
                        _decode_errors='ignore')

        for rec in re.finditer(GIT_LOG_PATTERN, str(output)):
            i = 1
            commit = {}
            for param in GIT_LOG_PARAMS:
                commit[param[0]] = six.text_type(rec.group(i), 'utf8')
                i += 1

            if not utils.check_email_validity(commit['author_email']):
                continue

            commit['files_changed'] = int(rec.group(i))
            i += 1
            lines_changed_group = rec.group(i)
            i += 1
            lines_changed = rec.group(i)
            i += 1
            deleted_or_inserted = rec.group(i)
            i += 1
            lines_deleted = rec.group(i)
            i += 1

            if lines_changed_group:  # there inserted or deleted lines
                if not lines_deleted:
                    if deleted_or_inserted[0] == 'd':  # deleted
                        lines_deleted = lines_changed
                        lines_changed = 0

            commit['lines_added'] = int(lines_changed or 0)
            commit['lines_deleted'] = int(lines_deleted or 0)

            for pattern_name, pattern in six.iteritems(MESSAGE_PATTERNS):
                collection = set()
                for item in re.finditer(pattern, commit['message']):
                    collection.add(item.group('id'))
                if collection:
                    commit[pattern_name] = list(collection)

            commit['date'] = int(commit['date'])
            commit['module'] = self.repo['module']
            commit['branches'] = set([branch])
            if commit['commit_id'] in self.release_index:
                commit['release'] = self.release_index[commit['commit_id']]
            else:
                commit['release'] = None

            if 'blueprint_id' in commit:
                commit['blueprint_id'] = [(commit['module'] + ':' + bp_name)
                                          for bp_name in commit['blueprint_id']
                                          ]

            if 'coauthor' in commit:
                verified_coauthors = []
                for coauthor in commit['coauthor']:
                    m = re.match(CO_AUTHOR_PATTERN, coauthor)
                    if m and utils.check_email_validity(
                            m.group("author_email")):
                        verified_coauthors.append(m.groupdict())

                if verified_coauthors:
                    commit['coauthor'] = verified_coauthors
                else:
                    del commit['coauthor']  # no valid authors

            yield commit