def bug_handler(self, msg): """ Handle a modified or created Bugzilla bug and update Neo4j if necessary. :param dict msg: a message to be processed """ bug_data = msg['body']['msg']['bug'] bug_params = { 'id_': str(bug_data['id']), 'creation_time': timestamp_to_datetime(bug_data['creation_time']), 'modified_time': timestamp_to_datetime(bug_data['last_change_time']), 'priority': bug_data['priority'], 'product_name': bug_data['product']['name'], 'product_version': bug_data['version']['name'], 'resolution': bug_data['resolution'], 'severity': bug_data['severity'], 'short_description': bug_data['summary'], 'status': bug_data['status']['name'], 'target_milestone': bug_data['target_milestone']['name'], } assignee = User.create_or_update({ 'username': bug_data['assigned_to']['login'].split('@')[0], 'email': bug_data['assigned_to']['login'] })[0] qa_contact = User.create_or_update({ 'username': bug_data['qa_contact']['login'].split('@')[0], 'email': bug_data['qa_contact']['login'] })[0] reporter = User.create_or_update({ 'username': bug_data['reporter']['login'].split('@')[0], 'email': bug_data['reporter']['login'] })[0] bug = BugzillaBug.create_or_update(bug_params)[0] bug.conditional_connect(bug.assignee, assignee) bug.conditional_connect(bug.qa_contact, qa_contact) bug.conditional_connect(bug.reporter, reporter)
def commit_handler(self, msg): """ Handle a dist-git commit message and update Neo4j if necessary. :param dict msg: a message to be processed """ repo = DistGitRepo.get_or_create({ 'namespace': msg['headers']['namespace'], 'name': msg['headers']['repo'] })[0] # Get the username from the email if the email is a Red Hat email email = msg['headers']['email'].lower() if email.endswith('@redhat.com'): username = email.split('@redhat.com')[0] else: username = email author = User.create_or_update({ 'username': username, 'email': email })[0] commit_message = msg['body']['msg']['message'] commit = DistGitCommit.create_or_update({ 'hash_': msg['headers']['rev'], 'log_message': commit_message, 'author_date': timestamp_to_datetime(msg['body']['msg']['author_date']), 'commit_date': timestamp_to_datetime(msg['body']['msg']['commit_date']) })[0] bug_rel_mapping = self.parse_bugzilla_bugs(commit_message) for bug_id in bug_rel_mapping['resolves']: bug = BugzillaBug.get_or_create({'id_': bug_id})[0] commit.resolved_bugs.connect(bug) for bug_id in bug_rel_mapping['related']: bug = BugzillaBug.get_or_create({'id_': bug_id})[0] commit.related_bugs.connect(bug) for bug_id in bug_rel_mapping['reverted']: bug = BugzillaBug.get_or_create({'id_': bug_id})[0] commit.reverted_bugs.connect(bug) commit.conditional_connect(commit.author, author) repo.commits.connect(commit)
def create_user_node(self, email): """ Create a User node in Neo4j. :param str email: the user's email :return: User object """ # If email is a Red Hat email address, username is same as domain name # prefix in the email address else store email as username if email.split('@')[1] == 'redhat.com': username = email.split('@')[0] else: username = email user = User.create_or_update(dict(username=username, email=email))[0] return user
def advisory_handler(self, msg): """ Handle an Errata tool advisory changes and update Neo4j if necessary. :param dict msg: a message to be processed """ advisory_id = msg['body']['headers']['errata_id'] erratum_url = '{0}/api/v1/erratum/{1}'.format( self.config['estuary_updater.errata_url'].rstrip('/'), advisory_id) response = requests.get(erratum_url, auth=requests_kerberos.HTTPKerberosAuth(), timeout=10) advisory_json = response.json() advisory_type = msg['body']['headers']['type'].lower() advisory_info = advisory_json['errata'][advisory_type] embargoed = msg['body']['headers']['synopsis'] == 'REDACTED' # We can't store information on embargoed advisories other than the ID if not embargoed: product_url = '{0}/products/{1}.json'.format( self.config['estuary_updater.errata_url'].rstrip('/'), advisory_info['product_id']) response = requests.get(product_url, auth=requests_kerberos.HTTPKerberosAuth(), timeout=10) product_json = response.json() reporter_url = '{0}/api/v1/user/{1}'.format( self.config['estuary_updater.errata_url'].rstrip('/'), advisory_info['reporter_id']) response = requests.get(reporter_url, auth=requests_kerberos.HTTPKerberosAuth(), timeout=10) reporter_json = response.json() reporter = User.create_or_update({ 'username': reporter_json['login_name'].split('@')[0], 'email': reporter_json['email_address'] })[0] assigned_to_url = '{0}/api/v1/user/{1}'.format( self.config['estuary_updater.errata_url'].rstrip('/'), advisory_info['assigned_to_id']) response = requests.get(assigned_to_url, auth=requests_kerberos.HTTPKerberosAuth(), timeout=10) assigned_to_json = response.json() assigned_to = User.create_or_update({ 'username': assigned_to_json['login_name'].split('@')[0], 'email': assigned_to_json['email_address'] })[0] advisory_params = { 'advisory_name': advisory_info['fulladvisory'], 'id_': advisory_id, 'product_name': product_json['product']['name'], 'security_impact': advisory_info['security_impact'], 'state': advisory_info['status'], 'synopsis': msg['body']['headers']['synopsis'] } for dt in ('actual_ship_date', 'created_at', 'issue_date', 'release_date', 'security_sla', 'status_updated_at', 'update_date'): if advisory_info[dt]: if dt == 'status_updated_at': estuary_key = 'status_time' else: estuary_key = dt advisory_params[estuary_key] = timestamp_to_datetime( advisory_info[dt]) else: advisory_params = { 'id_': advisory_id, # Set this to REDACTED and it'll be updated when it becomes public 'advisory_name': 'REDACTED' } if 'docker' in advisory_info['content_types']: try: advisory = ContainerAdvisory.create_or_update( advisory_params)[0] except neomodel.exceptions.ConstraintValidationFailed: # This must have errantly been created as an Advisory instead of a # ContainerAdvisory, so let's fix that. advisory = Advisory.nodes.get_or_none(id_=advisory_id) if not advisory: # If there was a constraint validation failure and the advisory isn't just # the wrong label, then we can't recover. raise advisory.add_label(ContainerAdvisory.__label__) advisory = ContainerAdvisory.create_or_update( advisory_params)[0] else: # Check to see if a ContainerAdvisory using this id already exists, and if so remove its # label because it should not be a ContainerAdvisory if docker isn't a content type. container_adv = ContainerAdvisory.nodes.get_or_none( id_=advisory_id) if container_adv: container_adv.remove_label(ContainerAdvisory.__label__) advisory = Advisory.create_or_update(advisory_params)[0] if not embargoed: advisory.conditional_connect(advisory.reporter, reporter) advisory.conditional_connect(advisory.assigned_to, assigned_to) bugs = advisory_json['bugs']['bugs'] for bug in bugs: bug = BugzillaBug.get_or_create({'id_': bug['bug']['id']})[0] advisory.attached_bugs.connect(bug)
def get_or_create_build(self, identifier, original_nvr=None, force_container_label=False): """ Get a Koji build from Neo4j, or create it if it does not exist in Neo4j. :param str/int identifier: an NVR (str) or build ID (int), or a dict of info from Koji API :kwarg str original_nvr: original_nvr property for the ContainerKojiBuild :kwarg bool force_container_label: when true, this skips the check to see if the build is a container and just creates the build with the ContainerKojiBuild label :rtype: KojiBuild :return: the Koji Build retrieved or created from Neo4j """ if type(identifier) is dict: build_info = identifier else: try: build_info = self.koji_session.getBuild(identifier, strict=True) except Exception: log.error( 'Failed to get brew build using the identifier {0}'.format( identifier)) raise build_params = { 'epoch': build_info['epoch'], 'id_': str(build_info['id']), 'name': build_info['package_name'], 'release': build_info['release'], 'state': build_info['state'], 'version': build_info['version'] } if build_info.get('extra'): build_params['extra'] = json.dumps(build_info['extra']) # To handle the case when a message has a null timestamp for time_key in ('completion_time', 'creation_time', 'start_time'): # Certain Koji API endpoints omit the *_ts values but have the *_time values, so that's # why the *_time values are used if build_info[time_key]: ts_format = r'%Y-%m-%d %H:%M:%S' if len(build_info[time_key].rsplit('.', 1)) == 2: # If there are microseconds, go ahead and parse that too ts_format += r'.%f' build_params[time_key] = datetime.strptime( build_info[time_key], ts_format) owner = User.create_or_update({ 'username': build_info['owner_name'], 'email': '{0}@redhat.com'.format(build_info['owner_name']) })[0] if force_container_label or self.is_container_build(build_info): if original_nvr: build_params['original_nvr'] = original_nvr build = ContainerKojiBuild.create_or_update(build_params)[0] elif self.is_module_build(build_info): module_extra_info = build_info['extra'].get('typeinfo', {}).get('module') build_params['context'] = module_extra_info.get('context') build_params['mbs_id'] = module_extra_info.get( 'module_build_service_id') build_params['module_name'] = module_extra_info.get('name') build_params['module_stream'] = module_extra_info.get('stream') build_params['module_version'] = module_extra_info.get('version') try: build = ModuleKojiBuild.create_or_update(build_params)[0] except neomodel.exceptions.ConstraintValidationFailed: # This must have errantly been created as a KojiBuild instead of a # ModuleKojiBuild, so let's fix that. build = KojiBuild.nodes.get_or_none(id_=build_params['id_']) if not build: # If there was a constraint validation failure and the build isn't just the # wrong label, then we can't recover. raise build.add_label(ModuleKojiBuild.__label__) build = ModuleKojiBuild.create_or_update(build_params)[0] else: build = KojiBuild.create_or_update(build_params)[0] build.conditional_connect(build.owner, owner) return build
def _update_neo4j(neo4j_url, total_results, counter_and_results): """ Update Neo4j results via mapping with multiprocessing. :param str neo4j_url: database url for Neo4j :param int total_results: the total number of results that will be processed. This is used for a logging statement about progress. :param tuple counter_and_results: a tuple where the first index is the current counter and the second index is a list of dictionaries representing results from Teiid """ try: previous_total = counter_and_results[0] results = counter_and_results[1] # Since _update_neo4j will be run in a separate process, we must configure the database # URL every time the method is run. neomodel_config.DATABASE_URL = neo4j_url # Create a thread pool with 4 threads to speed up queries to cgit pool = ThreadPool(4) counter = 0 for result in results: if counter % 200 == 0: until = counter + 200 if until > len(results): until = len(results) # Because of the joins in the SQL query, we end up with several rows with the # same commit hash and we only want to query cgit once per commit unique_commits = set([(c['module'], c['sha']) for c in results[counter:until]]) log.debug( 'Getting the author email addresses from cgit in parallel ' 'for results {0} to {1}'.format(counter, until)) repos_info = { r['commit']: r for r in pool.map(DistGitScraper._get_repo_info, unique_commits) } # This is no longer needed so it can be cleared to save RAM del unique_commits counter += 1 log.info('Processing commit entry {0}/{1}'.format( previous_total + counter, total_results)) repo_info = repos_info[result['sha']] if not repo_info.get('namespace'): log.info( 'Skipping nodes creation with commit ID {0}'.format( result['commit_id'])) continue log.debug( 'Creating nodes associated with commit ID {0}'.format( result['commit_id'])) repo = DistGitRepo.get_or_create({ 'namespace': repo_info['namespace'], 'name': result['module'] })[0] commit = DistGitCommit.create_or_update({ 'author_date': result['author_date'], 'commit_date': result['commit_date'], 'hash_': result['sha'], # In case we get unicode characters in Python 2 'log_message': bytes(result['log_message'], 'utf-8').decode() })[0] bug = BugzillaBug.get_or_create({'id_': result['bugzilla_id']})[0] log.debug( 'Creating the user nodes associated with commit ID {0}'. format(result['commit_id'])) author = User.create_or_update({ 'username': repo_info['author_username'], 'email': repo_info['author_email'] })[0] log.debug( 'Creating the relationships associated with commit ID {0}'. format(result['commit_id'])) repo.commits.connect(commit) commit.conditional_connect(commit.author, author) if result['bugzilla_type'] == 'related': commit.related_bugs.connect(bug) elif result['bugzilla_type'] == 'resolves': commit.resolved_bugs.connect(bug) elif result['bugzilla_type'] == 'reverted': commit.reverted_bugs.connect(bug) # This is no longer needed so it can be cleared to save RAM del repo_info finally: # Close the DB connection after this is done processing db.driver.close()
def update_neo4j(self, results): """ Update Neo4j with the dist-git commit and push information from Teiid. :param list results: a list of dictionaries """ pool = Pool(processes=8) counter = 0 for result in results: if counter % 200 == 0: until = counter + 200 if until > len(results): until = len(results) # Because of the joins in the SQL query, we end up with several rows with the same # commit hash and we only want to query cgit once per commit unique_commits = set([(c['module'], c['sha']) for c in results[counter:until]]) log.debug( 'Getting the author and committer email addresses from cgit in parallel ' 'for results {0} to {1}'.format(counter, until)) repos_info = {} for _r in pool.map(DistGitScraper._get_repo_info, unique_commits): r = json.loads(_r) repos_info[r['commit']] = r # This is no longer needed so it can be cleared to save RAM del unique_commits # A lot of RAM was allocated or used up, so let's call gc.collect() to ensure it # is removed gc.collect() counter += 1 log.info('Processing commit and push entry {0}/{1}'.format( str(counter), str(len(results)))) repo_info = repos_info[result['sha']] if not repo_info.get('namespace'): log.info( 'Skipping nodes creation with commit ID {0} and push ID {1}' .format(result['commit_id'], result['push_id'])) continue log.debug( 'Creating nodes associated with commit ID {0} and push ID {1}'. format(result['commit_id'], result['push_id'])) repo = DistGitRepo.get_or_create({ 'namespace': repo_info['namespace'], 'name': result['module'] })[0] branch_name = result['ref'].rsplit('/', 1)[1] branch = DistGitBranch.get_or_create({ 'name': branch_name, 'repo_namespace': repo_info['namespace'], 'repo_name': result['module'] })[0] commit = DistGitCommit.create_or_update({ 'author_date': result['author_date'], 'commit_date': result['commit_date'], 'hash_': result['sha'], # In case we get unicode characters in Python 2 'log_message': bytes(result['log_message'], 'utf-8').decode() })[0] push = DistGitPush.get_or_create({ 'id_': result['push_id'], 'push_date': result['push_date'], 'push_ip': result['push_ip'] })[0] bug = BugzillaBug.get_or_create({'id_': result['bugzilla_id']})[0] log.debug( 'Creating the user nodes associated with commit ID {0} and push ID {1}' .format(result['commit_id'], result['push_id'])) author = User.create_or_update({ 'username': repo_info['author_username'], 'email': repo_info['author_email'] })[0] committer = User.create_or_update({ 'username': repo_info['committer_username'], 'email': repo_info['committer_email'] })[0] pusher = User.get_or_create({'username': result['pusher']})[0] log.debug( 'Creating the relationships associated with commit ID {0} and push ID {1}' .format(result['commit_id'], result['push_id'])) repo.contributors.connect(author) repo.contributors.connect(committer) repo.contributors.connect(pusher) repo.commits.connect(commit) repo.pushes.connect(push) repo.branches.connect(branch) branch.contributors.connect(author) branch.contributors.connect(committer) branch.contributors.connect(pusher) branch.commits.connect(commit) branch.pushes.connect(push) push.conditional_connect(push.pusher, pusher) push.commits.connect(commit) commit.conditional_connect(commit.author, author) commit.conditional_connect(commit.committer, committer) if repo_info['parent']: parent_commit = DistGitCommit.get_or_create( {'hash_': repo_info['parent']})[0] commit.conditional_connect(commit.parent, parent_commit) if result['bugzilla_type'] == 'related': commit.related_bugs.connect(bug) elif result['bugzilla_type'] == 'resolves': commit.resolved_bugs.connect(bug) elif result['bugzilla_type'] == 'reverted': commit.reverted_bugs.connect(bug) # This is no longer needed so it can be cleared to save RAM del repo_info