def bug_handler(self, msg):
        """
        Handle a modified or created Bugzilla bug and update Neo4j if necessary.

        :param dict msg: a message to be processed
        """
        bug_data = msg['body']['msg']['bug']
        bug_params = {
            'id_': str(bug_data['id']),
            'creation_time': timestamp_to_datetime(bug_data['creation_time']),
            'modified_time':
            timestamp_to_datetime(bug_data['last_change_time']),
            'priority': bug_data['priority'],
            'product_name': bug_data['product']['name'],
            'product_version': bug_data['version']['name'],
            'resolution': bug_data['resolution'],
            'severity': bug_data['severity'],
            'short_description': bug_data['summary'],
            'status': bug_data['status']['name'],
            'target_milestone': bug_data['target_milestone']['name'],
        }
        assignee = User.create_or_update({
            'username':
            bug_data['assigned_to']['login'].split('@')[0],
            'email':
            bug_data['assigned_to']['login']
        })[0]
        qa_contact = User.create_or_update({
            'username':
            bug_data['qa_contact']['login'].split('@')[0],
            'email':
            bug_data['qa_contact']['login']
        })[0]
        reporter = User.create_or_update({
            'username':
            bug_data['reporter']['login'].split('@')[0],
            'email':
            bug_data['reporter']['login']
        })[0]

        bug = BugzillaBug.create_or_update(bug_params)[0]

        bug.conditional_connect(bug.assignee, assignee)
        bug.conditional_connect(bug.qa_contact, qa_contact)
        bug.conditional_connect(bug.reporter, reporter)
Example #2
0
    def commit_handler(self, msg):
        """
        Handle a dist-git commit message and update Neo4j if necessary.

        :param dict msg: a message to be processed
        """
        repo = DistGitRepo.get_or_create({
            'namespace':
            msg['headers']['namespace'],
            'name':
            msg['headers']['repo']
        })[0]

        # Get the username from the email if the email is a Red Hat email
        email = msg['headers']['email'].lower()
        if email.endswith('@redhat.com'):
            username = email.split('@redhat.com')[0]
        else:
            username = email

        author = User.create_or_update({
            'username': username,
            'email': email
        })[0]

        commit_message = msg['body']['msg']['message']
        commit = DistGitCommit.create_or_update({
            'hash_':
            msg['headers']['rev'],
            'log_message':
            commit_message,
            'author_date':
            timestamp_to_datetime(msg['body']['msg']['author_date']),
            'commit_date':
            timestamp_to_datetime(msg['body']['msg']['commit_date'])
        })[0]

        bug_rel_mapping = self.parse_bugzilla_bugs(commit_message)

        for bug_id in bug_rel_mapping['resolves']:
            bug = BugzillaBug.get_or_create({'id_': bug_id})[0]
            commit.resolved_bugs.connect(bug)

        for bug_id in bug_rel_mapping['related']:
            bug = BugzillaBug.get_or_create({'id_': bug_id})[0]
            commit.related_bugs.connect(bug)

        for bug_id in bug_rel_mapping['reverted']:
            bug = BugzillaBug.get_or_create({'id_': bug_id})[0]
            commit.reverted_bugs.connect(bug)

        commit.conditional_connect(commit.author, author)

        repo.commits.connect(commit)
Example #3
0
    def create_user_node(self, email):
        """
        Create a User node in Neo4j.

        :param str email: the user's email
        :return: User object
        """
        # If email is a Red Hat email address, username is same as domain name
        # prefix in the email address else store email as username
        if email.split('@')[1] == 'redhat.com':
            username = email.split('@')[0]
        else:
            username = email

        user = User.create_or_update(dict(username=username, email=email))[0]
        return user
    def advisory_handler(self, msg):
        """
        Handle an Errata tool advisory changes and update Neo4j if necessary.

        :param dict msg: a message to be processed
        """
        advisory_id = msg['body']['headers']['errata_id']

        erratum_url = '{0}/api/v1/erratum/{1}'.format(
            self.config['estuary_updater.errata_url'].rstrip('/'), advisory_id)
        response = requests.get(erratum_url,
                                auth=requests_kerberos.HTTPKerberosAuth(),
                                timeout=10)
        advisory_json = response.json()

        advisory_type = msg['body']['headers']['type'].lower()
        advisory_info = advisory_json['errata'][advisory_type]

        embargoed = msg['body']['headers']['synopsis'] == 'REDACTED'
        # We can't store information on embargoed advisories other than the ID
        if not embargoed:
            product_url = '{0}/products/{1}.json'.format(
                self.config['estuary_updater.errata_url'].rstrip('/'),
                advisory_info['product_id'])
            response = requests.get(product_url,
                                    auth=requests_kerberos.HTTPKerberosAuth(),
                                    timeout=10)
            product_json = response.json()

            reporter_url = '{0}/api/v1/user/{1}'.format(
                self.config['estuary_updater.errata_url'].rstrip('/'),
                advisory_info['reporter_id'])
            response = requests.get(reporter_url,
                                    auth=requests_kerberos.HTTPKerberosAuth(),
                                    timeout=10)
            reporter_json = response.json()

            reporter = User.create_or_update({
                'username':
                reporter_json['login_name'].split('@')[0],
                'email':
                reporter_json['email_address']
            })[0]

            assigned_to_url = '{0}/api/v1/user/{1}'.format(
                self.config['estuary_updater.errata_url'].rstrip('/'),
                advisory_info['assigned_to_id'])
            response = requests.get(assigned_to_url,
                                    auth=requests_kerberos.HTTPKerberosAuth(),
                                    timeout=10)
            assigned_to_json = response.json()

            assigned_to = User.create_or_update({
                'username':
                assigned_to_json['login_name'].split('@')[0],
                'email':
                assigned_to_json['email_address']
            })[0]

            advisory_params = {
                'advisory_name': advisory_info['fulladvisory'],
                'id_': advisory_id,
                'product_name': product_json['product']['name'],
                'security_impact': advisory_info['security_impact'],
                'state': advisory_info['status'],
                'synopsis': msg['body']['headers']['synopsis']
            }
            for dt in ('actual_ship_date', 'created_at', 'issue_date',
                       'release_date', 'security_sla', 'status_updated_at',
                       'update_date'):
                if advisory_info[dt]:
                    if dt == 'status_updated_at':
                        estuary_key = 'status_time'
                    else:
                        estuary_key = dt
                    advisory_params[estuary_key] = timestamp_to_datetime(
                        advisory_info[dt])
        else:
            advisory_params = {
                'id_': advisory_id,
                # Set this to REDACTED and it'll be updated when it becomes public
                'advisory_name': 'REDACTED'
            }

        if 'docker' in advisory_info['content_types']:
            try:
                advisory = ContainerAdvisory.create_or_update(
                    advisory_params)[0]
            except neomodel.exceptions.ConstraintValidationFailed:
                # This must have errantly been created as an Advisory instead of a
                # ContainerAdvisory, so let's fix that.
                advisory = Advisory.nodes.get_or_none(id_=advisory_id)
                if not advisory:
                    # If there was a constraint validation failure and the advisory isn't just
                    # the wrong label, then we can't recover.
                    raise
                advisory.add_label(ContainerAdvisory.__label__)
                advisory = ContainerAdvisory.create_or_update(
                    advisory_params)[0]
        else:
            # Check to see if a ContainerAdvisory using this id already exists, and if so remove its
            # label because it should not be a ContainerAdvisory if docker isn't a content type.
            container_adv = ContainerAdvisory.nodes.get_or_none(
                id_=advisory_id)
            if container_adv:
                container_adv.remove_label(ContainerAdvisory.__label__)
            advisory = Advisory.create_or_update(advisory_params)[0]

        if not embargoed:
            advisory.conditional_connect(advisory.reporter, reporter)
            advisory.conditional_connect(advisory.assigned_to, assigned_to)

            bugs = advisory_json['bugs']['bugs']

            for bug in bugs:
                bug = BugzillaBug.get_or_create({'id_': bug['bug']['id']})[0]
                advisory.attached_bugs.connect(bug)
Example #5
0
    def get_or_create_build(self,
                            identifier,
                            original_nvr=None,
                            force_container_label=False):
        """
        Get a Koji build from Neo4j, or create it if it does not exist in Neo4j.

        :param str/int identifier: an NVR (str) or build ID (int), or a dict of info from Koji API
        :kwarg str original_nvr: original_nvr property for the ContainerKojiBuild
        :kwarg bool force_container_label: when true, this skips the check to see if the build is a
            container and just creates the build with the ContainerKojiBuild label
        :rtype: KojiBuild
        :return: the Koji Build retrieved or created from Neo4j
        """
        if type(identifier) is dict:
            build_info = identifier
        else:
            try:
                build_info = self.koji_session.getBuild(identifier,
                                                        strict=True)
            except Exception:
                log.error(
                    'Failed to get brew build using the identifier {0}'.format(
                        identifier))
                raise

        build_params = {
            'epoch': build_info['epoch'],
            'id_': str(build_info['id']),
            'name': build_info['package_name'],
            'release': build_info['release'],
            'state': build_info['state'],
            'version': build_info['version']
        }

        if build_info.get('extra'):
            build_params['extra'] = json.dumps(build_info['extra'])

        # To handle the case when a message has a null timestamp
        for time_key in ('completion_time', 'creation_time', 'start_time'):
            # Certain Koji API endpoints omit the *_ts values but have the *_time values, so that's
            # why the *_time values are used
            if build_info[time_key]:
                ts_format = r'%Y-%m-%d %H:%M:%S'
                if len(build_info[time_key].rsplit('.', 1)) == 2:
                    # If there are microseconds, go ahead and parse that too
                    ts_format += r'.%f'
                build_params[time_key] = datetime.strptime(
                    build_info[time_key], ts_format)

        owner = User.create_or_update({
            'username':
            build_info['owner_name'],
            'email':
            '{0}@redhat.com'.format(build_info['owner_name'])
        })[0]

        if force_container_label or self.is_container_build(build_info):
            if original_nvr:
                build_params['original_nvr'] = original_nvr
            build = ContainerKojiBuild.create_or_update(build_params)[0]
        elif self.is_module_build(build_info):
            module_extra_info = build_info['extra'].get('typeinfo',
                                                        {}).get('module')
            build_params['context'] = module_extra_info.get('context')
            build_params['mbs_id'] = module_extra_info.get(
                'module_build_service_id')
            build_params['module_name'] = module_extra_info.get('name')
            build_params['module_stream'] = module_extra_info.get('stream')
            build_params['module_version'] = module_extra_info.get('version')
            try:
                build = ModuleKojiBuild.create_or_update(build_params)[0]
            except neomodel.exceptions.ConstraintValidationFailed:
                # This must have errantly been created as a KojiBuild instead of a
                # ModuleKojiBuild, so let's fix that.
                build = KojiBuild.nodes.get_or_none(id_=build_params['id_'])
                if not build:
                    # If there was a constraint validation failure and the build isn't just the
                    # wrong label, then we can't recover.
                    raise
                build.add_label(ModuleKojiBuild.__label__)
                build = ModuleKojiBuild.create_or_update(build_params)[0]
        else:
            build = KojiBuild.create_or_update(build_params)[0]

        build.conditional_connect(build.owner, owner)

        return build
Example #6
0
    def _update_neo4j(neo4j_url, total_results, counter_and_results):
        """
        Update Neo4j results via mapping with multiprocessing.

        :param str neo4j_url: database url for Neo4j
        :param int total_results: the total number of results that will be processed. This is used
        for a logging statement about progress.
        :param tuple counter_and_results: a tuple where the first index is the current counter and
        the second index is a list of dictionaries representing results from Teiid
        """
        try:
            previous_total = counter_and_results[0]
            results = counter_and_results[1]
            # Since _update_neo4j will be run in a separate process, we must configure the database
            # URL every time the method is run.
            neomodel_config.DATABASE_URL = neo4j_url
            # Create a thread pool with 4 threads to speed up queries to cgit
            pool = ThreadPool(4)
            counter = 0
            for result in results:
                if counter % 200 == 0:
                    until = counter + 200
                    if until > len(results):
                        until = len(results)
                    # Because of the joins in the SQL query, we end up with several rows with the
                    # same commit hash and we only want to query cgit once per commit
                    unique_commits = set([(c['module'], c['sha'])
                                          for c in results[counter:until]])
                    log.debug(
                        'Getting the author email addresses from cgit in parallel '
                        'for results {0} to {1}'.format(counter, until))
                    repos_info = {
                        r['commit']: r
                        for r in pool.map(DistGitScraper._get_repo_info,
                                          unique_commits)
                    }
                    # This is no longer needed so it can be cleared to save RAM
                    del unique_commits
                counter += 1
                log.info('Processing commit entry {0}/{1}'.format(
                    previous_total + counter, total_results))
                repo_info = repos_info[result['sha']]
                if not repo_info.get('namespace'):
                    log.info(
                        'Skipping nodes creation with commit ID {0}'.format(
                            result['commit_id']))
                    continue

                log.debug(
                    'Creating nodes associated with commit ID {0}'.format(
                        result['commit_id']))
                repo = DistGitRepo.get_or_create({
                    'namespace':
                    repo_info['namespace'],
                    'name':
                    result['module']
                })[0]
                commit = DistGitCommit.create_or_update({
                    'author_date':
                    result['author_date'],
                    'commit_date':
                    result['commit_date'],
                    'hash_':
                    result['sha'],
                    # In case we get unicode characters in Python 2
                    'log_message':
                    bytes(result['log_message'], 'utf-8').decode()
                })[0]
                bug = BugzillaBug.get_or_create({'id_':
                                                 result['bugzilla_id']})[0]

                log.debug(
                    'Creating the user nodes associated with commit ID {0}'.
                    format(result['commit_id']))
                author = User.create_or_update({
                    'username':
                    repo_info['author_username'],
                    'email':
                    repo_info['author_email']
                })[0]

                log.debug(
                    'Creating the relationships associated with commit ID {0}'.
                    format(result['commit_id']))
                repo.commits.connect(commit)

                commit.conditional_connect(commit.author, author)

                if result['bugzilla_type'] == 'related':
                    commit.related_bugs.connect(bug)
                elif result['bugzilla_type'] == 'resolves':
                    commit.resolved_bugs.connect(bug)
                elif result['bugzilla_type'] == 'reverted':
                    commit.reverted_bugs.connect(bug)
                # This is no longer needed so it can be cleared to save RAM
                del repo_info
        finally:
            # Close the DB connection after this is done processing
            db.driver.close()
Example #7
0
    def update_neo4j(self, results):
        """
        Update Neo4j with the dist-git commit and push information from Teiid.

        :param list results: a list of dictionaries
        """
        pool = Pool(processes=8)
        counter = 0
        for result in results:
            if counter % 200 == 0:
                until = counter + 200
                if until > len(results):
                    until = len(results)
                # Because of the joins in the SQL query, we end up with several rows with the same
                # commit hash and we only want to query cgit once per commit
                unique_commits = set([(c['module'], c['sha'])
                                      for c in results[counter:until]])
                log.debug(
                    'Getting the author and committer email addresses from cgit in parallel '
                    'for results {0} to {1}'.format(counter, until))
                repos_info = {}
                for _r in pool.map(DistGitScraper._get_repo_info,
                                   unique_commits):
                    r = json.loads(_r)
                    repos_info[r['commit']] = r
                # This is no longer needed so it can be cleared to save RAM
                del unique_commits
                # A lot of RAM was allocated or used up, so let's call gc.collect() to ensure it
                # is removed
                gc.collect()
            counter += 1
            log.info('Processing commit and push entry {0}/{1}'.format(
                str(counter), str(len(results))))
            repo_info = repos_info[result['sha']]
            if not repo_info.get('namespace'):
                log.info(
                    'Skipping nodes creation with commit ID {0} and push ID {1}'
                    .format(result['commit_id'], result['push_id']))
                continue

            log.debug(
                'Creating nodes associated with commit ID {0} and push ID {1}'.
                format(result['commit_id'], result['push_id']))
            repo = DistGitRepo.get_or_create({
                'namespace':
                repo_info['namespace'],
                'name':
                result['module']
            })[0]
            branch_name = result['ref'].rsplit('/', 1)[1]
            branch = DistGitBranch.get_or_create({
                'name':
                branch_name,
                'repo_namespace':
                repo_info['namespace'],
                'repo_name':
                result['module']
            })[0]
            commit = DistGitCommit.create_or_update({
                'author_date':
                result['author_date'],
                'commit_date':
                result['commit_date'],
                'hash_':
                result['sha'],
                # In case we get unicode characters in Python 2
                'log_message':
                bytes(result['log_message'], 'utf-8').decode()
            })[0]
            push = DistGitPush.get_or_create({
                'id_': result['push_id'],
                'push_date': result['push_date'],
                'push_ip': result['push_ip']
            })[0]
            bug = BugzillaBug.get_or_create({'id_': result['bugzilla_id']})[0]

            log.debug(
                'Creating the user nodes associated with commit ID {0} and push ID {1}'
                .format(result['commit_id'], result['push_id']))
            author = User.create_or_update({
                'username':
                repo_info['author_username'],
                'email':
                repo_info['author_email']
            })[0]
            committer = User.create_or_update({
                'username':
                repo_info['committer_username'],
                'email':
                repo_info['committer_email']
            })[0]
            pusher = User.get_or_create({'username': result['pusher']})[0]

            log.debug(
                'Creating the relationships associated with commit ID {0} and push ID {1}'
                .format(result['commit_id'], result['push_id']))
            repo.contributors.connect(author)
            repo.contributors.connect(committer)
            repo.contributors.connect(pusher)
            repo.commits.connect(commit)
            repo.pushes.connect(push)
            repo.branches.connect(branch)

            branch.contributors.connect(author)
            branch.contributors.connect(committer)
            branch.contributors.connect(pusher)
            branch.commits.connect(commit)
            branch.pushes.connect(push)

            push.conditional_connect(push.pusher, pusher)
            push.commits.connect(commit)

            commit.conditional_connect(commit.author, author)
            commit.conditional_connect(commit.committer, committer)

            if repo_info['parent']:
                parent_commit = DistGitCommit.get_or_create(
                    {'hash_': repo_info['parent']})[0]
                commit.conditional_connect(commit.parent, parent_commit)

            if result['bugzilla_type'] == 'related':
                commit.related_bugs.connect(bug)
            elif result['bugzilla_type'] == 'resolves':
                commit.resolved_bugs.connect(bug)
            elif result['bugzilla_type'] == 'reverted':
                commit.reverted_bugs.connect(bug)
            # This is no longer needed so it can be cleared to save RAM
            del repo_info