def get_koji_builds(self, start_date, end_date): """ Query Teiid for Koji builds. :param datetime.datetime start_date: determines when to start the query :param datetime.datetime end_date: determines until when to scrape data :return: a list of dictionaries :rtype: list """ # SQL query to fetch all builds from start date until now log.info('Getting all Koji builds since {0} until {1}'.format(start_date, end_date)) sql_query = """ SELECT events.time as creation_time, build.completion_time, build.epoch, build.extra, build.id, brew.users.name as owner_name, brew.users.krb_principal as owner_username, package.name as package_name, build.release, build.start_time, build.state, build.task_id, build.version FROM build LEFT JOIN events ON build.create_event = events.id LEFT JOIN package ON build.pkg_id = package.id LEFT JOIN brew.users ON build.owner = brew.users.id WHERE events.time IS NOT NULL AND events.time >= '{0}' AND events.time <= '{1}' ORDER BY build.id """.format(start_date, end_date) return self.teiid.query(sql=sql_query)
def get_bugzilla_bugs(self, start_date, end_date): """ Get the Buzilla bugs information from Teiid. :param datetime.datetime start_date: when to start scraping data from :param datetime.datetime end_date: determines until when to scrape data :return: list of dictionaries containing bug info :rtype: list """ log.info('Getting all Bugzilla bugs since {0} until {1}'.format(start_date, end_date)) sql_query = """ SELECT bugs.*, products.name AS product_name, classifications.name AS classification, assigned.login_name AS assigned_to_email, reported.login_name AS reported_by_email, qa.login_name AS qa_contact_email FROM bugzilla.bugs AS bugs LEFT JOIN bugzilla.products AS products ON bugs.product_id = products.id LEFT JOIN bugzilla.classifications AS classifications ON products.classification_id = classifications.id LEFT JOIN bugzilla.profiles AS assigned ON bugs.assigned_to = assigned.userid LEFT JOIN bugzilla.profiles AS reported ON bugs.reporter = reported.userid LEFT JOIN bugzilla.profiles AS qa ON bugs.qa_contact = qa.userid WHERE classifications.name = 'Red Hat' AND bugs.delta_ts >= '{0}' AND bugs.delta_ts <= '{1}' ORDER BY bugs.creation_ts DESC; """.format(start_date, end_date) return self.teiid.query(sql=sql_query)
def run(self, since=None, until=None): """ Run the Freshmaker scraper. :param str since: a datetime to start scraping data from :param str until: a datetime to scrape data until """ if since or until: log.warn('Ignoring the since/until parameter; They do not apply to the' 'Freshmaker scraper') log.info('Starting initial load of Freshmaker events') self.query_api_and_update_neo4j() log.info('Initial load of Freshmaker events complete!')
def get_advisories(self, since, until): """ Query Teiid for the Errata Tool advisories. :param datetime.datetime since: determines when to start querying :param datetime.datetime until: determines until when to scrape data :return: a list of dictionaries :rtype: list """ sql = """\ SELECT main.actual_ship_date, main.fulladvisory as advisory_name, assigned_users.login_name AS assigned_to, main.content_types, states.current as state, main.created_at, main.id AS id, main.issue_date, package_users.login_name AS package_owner, products.name as product_name, products.short_name as product_short_name, main.release_date, reporter_users.login_name AS reporter, main.security_impact, main.security_sla, main.status_updated_at AS status_time, main.synopsis, main.errata_type AS type, main.update_date, main.updated_at FROM Errata_public.errata_main AS main LEFT JOIN Errata_public.state_indices as states ON main.current_state_index_id = states.id LEFT JOIN Errata_public.errata_products as products ON main.product_id = products.id LEFT JOIN Errata_public.users AS assigned_users ON main.assigned_to_id = assigned_users.id LEFT JOIN Errata_public.users AS package_users ON main.package_owner_id = package_users.id LEFT JOIN Errata_public.users AS reporter_users ON main.reporter_id = reporter_users.id WHERE main.updated_at >= '{0}' AND main.updated_at <= '{1}' ORDER BY main.id; """.format(since, until) log.info('Getting Errata advisories since {0} until {1}'.format( since, until)) return self.teiid.query(sql)
def get_attached_bugs(self, advisory_id): """ Query Teiid to find the Bugzilla bugs attached to a specific advisory. :param int advisory_id: the advisory ID :return: a list of a dictionaries :rtype: list """ sql = """\ SELECT filed_bugs.bug_id as id_ FROM Errata_public.filed_bugs as filed_bugs WHERE filed_bugs.errata_id = {0}; """.format(advisory_id) log.info( 'Getting Bugzilla bugs tied to the advisory with ID {0}'.format( advisory_id)) return self.teiid.query(sql)
def get_advisory_states(self, advisory_id): """ Query Teiid to find the states of a specific advisory. :param int advisory_id: the advisory ID :return: a list of a dictionaries :rtype: list """ sql = """\ SELECT states.created_at, states.id, states.current as name, states.updated_at, users.login_name AS username FROM Errata_public.state_indices as states LEFT JOIN Errata_public.users as users ON states.who_id = users.id WHERE errata_id = {} ORDER BY states.id; """.format(advisory_id) log.info('Getting states tied to the advisory with ID {0}'.format( advisory_id)) return self.teiid.query(sql)
def update_neo4j(self, bugs): """ Update Neo4j with Bugzilla bugs information from Teiid. :param list bugs: a list of dictionaries """ log.info('Beginning to upload data to Neo4j') count = 0 for bug_dict in bugs: bug = BugzillaBug.create_or_update( dict(id_=bug_dict['bug_id'], severity=bug_dict['bug_severity'], status=bug_dict['bug_status'], creation_time=bug_dict['creation_ts'], modified_time=bug_dict['delta_ts'], priority=bug_dict['priority'], product_name=bytes(bug_dict['product_name'], 'utf-8').decode(), product_version=bug_dict['version'], classification=bug_dict['classification'], resolution=bug_dict['resolution'], target_milestone=bug_dict['target_milestone'], votes=bug_dict['votes'], short_description=bytes(bug_dict['short_desc'], 'utf-8').decode()))[0] count += 1 log.info('Uploaded {0} bugs out of {1}'.format(count, len(bugs))) # Creating User nodes and updating their relationships if bug_dict['assigned_to']: assignee = self.create_user_node(bug_dict['assigned_to_email']) bug.conditional_connect(bug.assignee, assignee) if bug_dict['reporter']: reporter = self.create_user_node(bug_dict['reported_by_email']) bug.conditional_connect(bug.reporter, reporter) if bug_dict['qa_contact']: qa_contact = self.create_user_node( bug_dict['qa_contact_email']) bug.conditional_connect(bug.qa_contact, qa_contact)
def get_associated_builds(self, advisory_id): """ Query Teiid to find the Brew builds associated with a specific advisory. :param int advisory_id: the advisory ID :return: a list of a dictionaries :rtype: list """ sql = """\ SELECT brew_builds.id as id_, packages.name, brew_builds.release, removed_index_id, brew_builds.version FROM Errata_public.errata_brew_mappings as brew_mappings LEFT JOIN Errata_public.brew_builds AS brew_builds ON brew_builds.id = brew_mappings.brew_build_id LEFT JOIN Errata_public.packages AS packages ON brew_builds.package_id = packages.id WHERE errata_id = {0}; """.format(advisory_id) log.info('Getting Brew builds tied to the advisory with ID {0}'.format( advisory_id)) return self.teiid.query(sql)
def get_distgit_data(self, since, until): """ Query Teiid for the dist-git commit and Bugzilla information. :param datetime.datetime since: determines when to start the query :param datetime.datetime until: determines until when to scrape data :return: a list of dictionaries :rtype: list """ sql = """\ SELECT c.commit_id, c.author, c.author_date, c.commit_date, c.log_message, c.sha, bz.bugzilla_id, bz.type as bugzilla_type, p.module, p.ref FROM gitbz.git_commits as c LEFT JOIN gitbz.git_push_commit_map as map ON c.commit_id = map.commit_id LEFT JOIN gitbz.git_pushes as p ON p.push_id = map.push_id LEFT JOIN gitbz.redhat_bugzilla_references as bz ON c.commit_id = bz.commit_id WHERE c.commit_date >= '{0}' AND c.commit_date <= '{1}' ORDER BY c.commit_date DESC; """.format(since, until) log.info('Getting dist-git commits since {0} until {1}'.format(since, until)) return self.teiid.query(sql)
def run(self, since=None, until=None): """ Run the dist-git scraper. :param str since: a datetime to start scraping data from :param str until: a datetime to scrape data until """ log.info('Starting initial load of dist-git commits') if since is None: start_date = self.default_since else: start_date = timestamp_to_date(since) if until is None: end_date = self.default_until else: end_date = timestamp_to_date(until) results = self.get_distgit_data(start_date, end_date) total_results = len(results) log.info('Successfully fetched {0} results from Teiid'.format( total_results)) # Overwrite results with the formatted results so we don't have to store both in RAM results = list(self._get_result_chunks(results)) # Upload the results to Neo4j using multi-processing to process chunks of results. We don't # use pool so that way the process doesn't get reused and the RAM is returned to the OS. # This will aid in a work-around for a memory leak from one of the libraries used that # couldn't be tracked down. procs = [] concurrent_procs = 2 for i, result in enumerate(results): # Only check if we've reached the process limit after it's technically possible if i >= concurrent_procs: active_procs = [_proc for _proc in procs if _proc.is_alive()] if len(active_procs) >= concurrent_procs: log.debug( 'There are already {0} processes running. Will wait until one of ' 'them completes.'.format(len(active_procs))) active_procs[0].join() proc = Process(target=self._update_neo4j, args=(neomodel_config.DATABASE_URL, total_results, result)) proc.start() procs.append(proc) for proc in procs: # Wait for all the processes to finish proc.join() log.info('Initial load of dist-git commits complete!')
def run(self, since=None, until=None): """ Run the Bugzilla scraper. :param str since: a datetime to start scraping data from :param str until: a datetime to scrape data until """ log.info('Starting initial load of Bugzilla bugs') if since is None: start_date = self.default_since else: start_date = timestamp_to_date(since) if until is None: end_date = self.default_until else: end_date = timestamp_to_date(until) bugs = self.get_bugzilla_bugs(start_date, end_date) log.info('Successfully fetched {0} bugs from teiid'.format(len(bugs))) self.update_neo4j(bugs) log.info('Initial load of Bugzilla bugs complete!')
def run(self, since=None, until=None): """ Run the Errata Tool scraper. :param str since: a datetime to start scraping data from :param str until: a datetime to scrape data until """ log.info('Starting initial load of Errata advisories') if since is None: start_date = self.default_since else: start_date = timestamp_to_date(since) if until is None: end_date = self.default_until else: end_date = timestamp_to_date(until) advisories = self.get_advisories(start_date, end_date) log.info('Successfully fetched {0} advisories from Teiid'.format( len(advisories))) self.update_neo4j(advisories) log.info('Initial load of Errata advisories complete!')
def run(self, since=None, until=None): """ Run the dist-git scraper. :param str since: a datetime to start scraping data from :param str until: a datetime to scrape data until """ log.info('Starting initial load of dist-git commits and pushes') if since is None: start_date = self.default_since else: start_date = timestamp_to_date(since) if until is None: end_date = self.default_until else: end_date = timestamp_to_date(until) results = self.get_distgit_data(start_date, end_date) log.info('Successfully fetched {0} results from Teiid'.format( len(results))) self.update_neo4j(results) log.info('Initial load of dist-git commits and pushes complete!')
def run(self, since=None, until=None): """ Run the Koji scraper. :param str since: a datetime to start scraping data from :param str until: a datetime to scrape data until """ log.info('Starting initial load for Koji') # Initialize a start date from which all builds must be fetched # If no input is given by the user, fetch builds from the past two years if since is None: start_date = self.default_since else: start_date = utils.timestamp_to_date(since) if until is None: end_date = self.default_until else: end_date = utils.timestamp_to_date(until) builds = self.get_koji_builds(start_date, end_date) log.info('Successfully fetched {0} builds from teiid'.format(len(builds))) self.update_neo4j(builds) log.info('Initial load of Koji builds complete!')
def update_neo4j(self, advisories): """ Update Neo4j with Errata Tool advisories from Teiid. :param list advisories: a list of dictionaries of advisories """ count = 0 for advisory in advisories: count += 1 log.info('Processing advisory {0}/{1}'.format( count, len(advisories))) # The content_types column is a string with YAML in it, so convert it to a list content_types = yaml.safe_load(advisories[0]['content_types']) adv = Advisory.create_or_update({ 'actual_ship_date': advisory['actual_ship_date'], 'advisory_name': advisory['advisory_name'], 'content_types': content_types, 'created_at': advisory['created_at'], 'id_': advisory['id'], 'issue_date': advisory['issue_date'], 'product_name': advisory['product_name'], 'product_short_name': advisory['product_short_name'], 'release_date': advisory['release_date'], 'security_impact': advisory['security_impact'], 'security_sla': advisory['security_sla'], 'state': advisory['state'], 'status_time': advisory['status_time'], 'synopsis': advisory['synopsis'], 'type_': advisory['type'], 'update_date': advisory['update_date'], 'updated_at': advisory['updated_at'] })[0] assigned_to = User.get_or_create( {'username': advisory['assigned_to'].split('@')[0]})[0] adv.conditional_connect(adv.assigned_to, assigned_to) package_owner = User.get_or_create( {'username': advisory['package_owner'].split('@')[0]})[0] adv.conditional_connect(adv.package_owner, package_owner) reporter = User.get_or_create( {'username': advisory['reporter'].split('@')[0]})[0] adv.conditional_connect(adv.reporter, reporter) for state in self.get_advisory_states(advisory['id']): adv_state = AdvisoryState.create_or_update({ 'id_': state['id'], 'name': state['name'], 'created_at': state['created_at'], 'updated_at': state['updated_at'] })[0] adv_state.conditional_connect(adv_state.advisory, adv) state_creator = User.get_or_create( {'username': state['username'].split('@')[0]})[0] adv_state.conditional_connect(adv_state.creator, state_creator) for attached_bug in self.get_attached_bugs(advisory['id']): bug = BugzillaBug.get_or_create(attached_bug)[0] adv.attached_bugs.connect(bug) for associated_build in self.get_associated_builds(advisory['id']): # If this is set, that means it was once part of the advisory but not anymore. # This relationship needs to be deleted if it exists. if associated_build['removed_index_id']: build = KojiBuild.nodes.get_or_none( id_=associated_build['id_']) if build: adv.attached_builds.disconnect(build) else: # This key shouldn't be stored in Neo4j del associated_build['removed_index_id'] build = KojiBuild.get_or_create(associated_build)[0] adv.attached_builds.connect(build)
def update_neo4j(self, builds): """ Update Neo4j with Koji build information from Teiid. :param list builds: a list of dictionaries """ # Uploads builds data to their respective nodes log.info('Beginning to upload data to Neo4j') count = 0 for build_dict in builds: build_params = dict( id_=build_dict['id'], epoch=build_dict['epoch'], state=build_dict['state'], creation_time=build_dict['creation_time'], start_time=build_dict['start_time'], completion_time=build_dict['completion_time'], extra=build_dict['extra'], name=build_dict['package_name'], version=build_dict['version'], release=build_dict['release'] ) package_name = build_dict['package_name'] try: extra_json = json.loads(build_dict['extra']) except (ValueError, TypeError): extra_json = {} container_build = False # Checking a heuristic for determining if a build is a container build since, currently # there is no definitive way to do it. if extra_json and extra_json.get('container_koji_build_id'): container_build = True # Checking another heuristic for determining if a build is a container build since # currently there is no definitive way to do it. elif (package_name.endswith('-container') or package_name.endswith('-docker')): container_build = True if container_build: build = ContainerKojiBuild.create_or_update(build_params)[0] else: build = KojiBuild.create_or_update(build_params)[0] if build_dict['owner_username']: username = build_dict['owner_username'].split('@')[0] else: username = build_dict['owner_name'] user = User.get_or_create(dict(username=username))[0] build.conditional_connect(build.owner, user) tags = self.get_build_tags(build_dict['id']) current_tag_ids = set() for _tag in tags: current_tag_ids.add(_tag['tag_id']) tag = KojiTag.create_or_update(dict( id_=_tag['tag_id'], name=_tag['tag_name'] ))[0] tag.builds.connect(build) # _tag.id_ must be cast as an int because it is stored as a string in Neo4j since # it's a UniqueIdProperty connected_tags = {int(_tag.id_): _tag for _tag in build.tags.all()} extra_connected_tag_ids = set(connected_tags.keys()) - current_tag_ids for tag_id in extra_connected_tag_ids: build.tags.disconnect(connected_tags[tag_id]) count += 1 log.info('Uploaded {0} builds out of {1}'.format(count, len(builds))) try: extra_json = json.loads(build_dict['extra']) except (ValueError, TypeError): extra_json = {} container_koji_task_id = extra_json.get('container_koji_task_id') if build_dict['task_id']: task_id = build_dict['task_id'] elif container_koji_task_id: task_id = container_koji_task_id else: # Continue if the task_id is None continue # Getting task related to the current build task_dict = self.get_task(task_id)[0] xml_root = ET.fromstring(task_dict['request']) commit_hash = None for child in xml_root.iter('string'): if child.text and child.text.startswith('git'): commit_hash = child.text.rsplit('#', 1)[1] break if not task_dict: # Continue if no corresponding task found continue task = KojiTask.create_or_update(dict( id_=task_dict['id'], weight=task_dict['weight'], create_time=task_dict['create_time'], start_time=task_dict['start_time'], completion_time=task_dict['completion_time'], state=task_dict['state'], priority=task_dict['priority'], arch=task_dict['arch'], method=task_dict['method'] ))[0] # Defining Relationships task.builds.connect(build) task.conditional_connect(task.owner, user) if commit_hash: commit = DistGitCommit.get_or_create(dict(hash_=commit_hash))[0] build.conditional_connect(build.commit, commit) child_tasks = self.get_task_children(task_dict['id']) if not child_tasks: # Continue if no corresponding child task found continue for child_task_dict in child_tasks: child_task = KojiTask.create_or_update(dict( id_=child_task_dict['id'], weight=child_task_dict['weight'], create_time=child_task_dict['create_time'], start_time=child_task_dict['start_time'], completion_time=child_task_dict['completion_time'], state=child_task_dict['state'], priority=child_task_dict['priority'], arch=child_task_dict['arch'], method=child_task_dict['method'] ))[0] child_task.conditional_connect(child_task.parent, task)
def _update_neo4j(neo4j_url, total_results, counter_and_results): """ Update Neo4j results via mapping with multiprocessing. :param str neo4j_url: database url for Neo4j :param int total_results: the total number of results that will be processed. This is used for a logging statement about progress. :param tuple counter_and_results: a tuple where the first index is the current counter and the second index is a list of dictionaries representing results from Teiid """ try: previous_total = counter_and_results[0] results = counter_and_results[1] # Since _update_neo4j will be run in a separate process, we must configure the database # URL every time the method is run. neomodel_config.DATABASE_URL = neo4j_url # Create a thread pool with 4 threads to speed up queries to cgit pool = ThreadPool(4) counter = 0 for result in results: if counter % 200 == 0: until = counter + 200 if until > len(results): until = len(results) # Because of the joins in the SQL query, we end up with several rows with the # same commit hash and we only want to query cgit once per commit unique_commits = set([(c['module'], c['sha']) for c in results[counter:until]]) log.debug( 'Getting the author email addresses from cgit in parallel ' 'for results {0} to {1}'.format(counter, until)) repos_info = { r['commit']: r for r in pool.map(DistGitScraper._get_repo_info, unique_commits) } # This is no longer needed so it can be cleared to save RAM del unique_commits counter += 1 log.info('Processing commit entry {0}/{1}'.format( previous_total + counter, total_results)) repo_info = repos_info[result['sha']] if not repo_info.get('namespace'): log.info( 'Skipping nodes creation with commit ID {0}'.format( result['commit_id'])) continue log.debug( 'Creating nodes associated with commit ID {0}'.format( result['commit_id'])) repo = DistGitRepo.get_or_create({ 'namespace': repo_info['namespace'], 'name': result['module'] })[0] commit = DistGitCommit.create_or_update({ 'author_date': result['author_date'], 'commit_date': result['commit_date'], 'hash_': result['sha'], # In case we get unicode characters in Python 2 'log_message': bytes(result['log_message'], 'utf-8').decode() })[0] bug = BugzillaBug.get_or_create({'id_': result['bugzilla_id']})[0] log.debug( 'Creating the user nodes associated with commit ID {0}'. format(result['commit_id'])) author = User.create_or_update({ 'username': repo_info['author_username'], 'email': repo_info['author_email'] })[0] log.debug( 'Creating the relationships associated with commit ID {0}'. format(result['commit_id'])) repo.commits.connect(commit) commit.conditional_connect(commit.author, author) if result['bugzilla_type'] == 'related': commit.related_bugs.connect(bug) elif result['bugzilla_type'] == 'resolves': commit.resolved_bugs.connect(bug) elif result['bugzilla_type'] == 'reverted': commit.reverted_bugs.connect(bug) # This is no longer needed so it can be cleared to save RAM del repo_info finally: # Close the DB connection after this is done processing db.driver.close()
def update_neo4j(self, results): """ Update Neo4j with the dist-git commit and push information from Teiid. :param list results: a list of dictionaries """ pool = Pool(processes=8) counter = 0 for result in results: if counter % 200 == 0: until = counter + 200 if until > len(results): until = len(results) # Because of the joins in the SQL query, we end up with several rows with the same # commit hash and we only want to query cgit once per commit unique_commits = set([(c['module'], c['sha']) for c in results[counter:until]]) log.debug( 'Getting the author and committer email addresses from cgit in parallel ' 'for results {0} to {1}'.format(counter, until)) repos_info = {} for _r in pool.map(DistGitScraper._get_repo_info, unique_commits): r = json.loads(_r) repos_info[r['commit']] = r # This is no longer needed so it can be cleared to save RAM del unique_commits # A lot of RAM was allocated or used up, so let's call gc.collect() to ensure it # is removed gc.collect() counter += 1 log.info('Processing commit and push entry {0}/{1}'.format( str(counter), str(len(results)))) repo_info = repos_info[result['sha']] if not repo_info.get('namespace'): log.info( 'Skipping nodes creation with commit ID {0} and push ID {1}' .format(result['commit_id'], result['push_id'])) continue log.debug( 'Creating nodes associated with commit ID {0} and push ID {1}'. format(result['commit_id'], result['push_id'])) repo = DistGitRepo.get_or_create({ 'namespace': repo_info['namespace'], 'name': result['module'] })[0] branch_name = result['ref'].rsplit('/', 1)[1] branch = DistGitBranch.get_or_create({ 'name': branch_name, 'repo_namespace': repo_info['namespace'], 'repo_name': result['module'] })[0] commit = DistGitCommit.create_or_update({ 'author_date': result['author_date'], 'commit_date': result['commit_date'], 'hash_': result['sha'], # In case we get unicode characters in Python 2 'log_message': bytes(result['log_message'], 'utf-8').decode() })[0] push = DistGitPush.get_or_create({ 'id_': result['push_id'], 'push_date': result['push_date'], 'push_ip': result['push_ip'] })[0] bug = BugzillaBug.get_or_create({'id_': result['bugzilla_id']})[0] log.debug( 'Creating the user nodes associated with commit ID {0} and push ID {1}' .format(result['commit_id'], result['push_id'])) author = User.create_or_update({ 'username': repo_info['author_username'], 'email': repo_info['author_email'] })[0] committer = User.create_or_update({ 'username': repo_info['committer_username'], 'email': repo_info['committer_email'] })[0] pusher = User.get_or_create({'username': result['pusher']})[0] log.debug( 'Creating the relationships associated with commit ID {0} and push ID {1}' .format(result['commit_id'], result['push_id'])) repo.contributors.connect(author) repo.contributors.connect(committer) repo.contributors.connect(pusher) repo.commits.connect(commit) repo.pushes.connect(push) repo.branches.connect(branch) branch.contributors.connect(author) branch.contributors.connect(committer) branch.contributors.connect(pusher) branch.commits.connect(commit) branch.pushes.connect(push) push.conditional_connect(push.pusher, pusher) push.commits.connect(commit) commit.conditional_connect(commit.author, author) commit.conditional_connect(commit.committer, committer) if repo_info['parent']: parent_commit = DistGitCommit.get_or_create( {'hash_': repo_info['parent']})[0] commit.conditional_connect(commit.parent, parent_commit) if result['bugzilla_type'] == 'related': commit.related_bugs.connect(bug) elif result['bugzilla_type'] == 'resolves': commit.resolved_bugs.connect(bug) elif result['bugzilla_type'] == 'reverted': commit.reverted_bugs.connect(bug) # This is no longer needed so it can be cleared to save RAM del repo_info
def update_neo4j(self, builds): """ Update Neo4j with Koji build information from Teiid. :param list builds: a list of dictionaries """ # Uploads builds data to their respective nodes log.info('Beginning to upload data to Neo4j') count = 0 for build_dict in builds: build_params = dict( id_=build_dict['id'], epoch=build_dict['epoch'], state=build_dict['state'], creation_time=build_dict['creation_time'], start_time=build_dict['start_time'], completion_time=build_dict['completion_time'], name=build_dict['package_name'], version=build_dict['version'], release=build_dict['release'] ) try: extra_json = json.loads(build_dict['extra']) except (ValueError, TypeError): extra_json = {} if self.is_container_build(build_dict): build_params['operator'] = bool( extra_json.get('typeinfo', {}).get('operator-manifests', {}).get('archive') ) try: build = ContainerKojiBuild.create_or_update(build_params)[0] except neomodel.exceptions.ConstraintValidationFailed: # This must have errantly been created as a KojiBuild instead of a # ContainerKojiBuild, so let's fix that. build = KojiBuild.nodes.get_or_none(id_=build_params['id_']) if not build: # If there was a constraint validation failure and the build isn't just the # wrong label, then we can't recover. raise build.add_label(ContainerKojiBuild.__label__) build = ContainerKojiBuild.create_or_update(build_params)[0] elif self.is_module_build(build_dict): module_extra_info = extra_json.get('typeinfo', {}).get('module') try: build_params['context'] = module_extra_info.get('context') build_params['mbs_id'] = module_extra_info.get('module_build_service_id') build_params['module_name'] = module_extra_info.get('name') build_params['module_stream'] = module_extra_info.get('stream') build_params['module_version'] = module_extra_info.get('version') build = ModuleKojiBuild.create_or_update(build_params)[0] except neomodel.exceptions.ConstraintValidationFailed: # This must have errantly been created as a KojiBuild instead of a # ModuleKojiBuild, so let's fix that. build = KojiBuild.nodes.get_or_none(id_=build_params['id_']) if not build: # If there was a constraint validation failure and the build isn't just the # wrong label, then we can't recover. raise build.add_label(ModuleKojiBuild.__label__) build = ModuleKojiBuild.create_or_update(build_params)[0] else: build = KojiBuild.create_or_update(build_params)[0] username = build_dict['owner_name'] user = User.get_or_create(dict(username=username))[0] build.conditional_connect(build.owner, user) if build.__label__ == ModuleKojiBuild.__label__: module_build_tag_name = module_extra_info.get('content_koji_tag') if module_build_tag_name: module_components = self.get_tag_info(module_build_tag_name) # Some modules don't have components if module_components: for item in module_components: module_component = KojiBuild.get_or_create(dict( id_=item['build_id'] ))[0] build.components.connect(module_component) component_builds = self.get_build_info( [item['build_id'] for item in module_components]) self.update_neo4j(component_builds) count += 1 log.info('Uploaded {0} builds out of {1}'.format(count, len(builds))) container_koji_task_id = extra_json.get('container_koji_task_id') if build_dict['task_id']: task_id = build_dict['task_id'] elif container_koji_task_id: task_id = container_koji_task_id else: # Continue if the task_id is None continue # Getting task related to the current build try: task_dict = self.get_task(task_id)[0] except IndexError: continue commit_hash = None # Only look for the commit hash if the build is an RPM or container if task_dict['method'] in ('build', 'buildContainer'): xml_root = ET.fromstring(task_dict['request']) for child in xml_root.iter('string'): if child.text and child.text.startswith('git'): commit_hash = child.text.rsplit('#', 1)[1] break if commit_hash: commit = DistGitCommit.get_or_create(dict(hash_=commit_hash))[0] build.conditional_connect(build.commit, commit)
def update_neo4j(self, advisories): """ Update Neo4j with Errata Tool advisories from Teiid. :param list advisories: a list of dictionaries of advisories """ count = 0 for advisory in advisories: count += 1 log.info('Processing advisory {0}/{1}'.format( count, len(advisories))) # The content_types column is a string with YAML in it, so convert it to a list content_types = yaml.safe_load(advisory['content_types']) adv = Advisory.create_or_update({ 'actual_ship_date': advisory['actual_ship_date'], 'advisory_name': advisory['advisory_name'], 'content_types': content_types, 'created_at': advisory['created_at'], 'id_': advisory['id'], 'issue_date': advisory['issue_date'], 'product_name': advisory['product_name'], 'product_short_name': advisory['product_short_name'], 'release_date': advisory['release_date'], 'security_impact': advisory['security_impact'], 'security_sla': advisory['security_sla'], 'state': advisory['state'], 'status_time': advisory['status_time'], 'synopsis': advisory['synopsis'], 'update_date': advisory['update_date'], })[0] container_adv = False for associated_build in self.get_associated_builds(advisory['id']): # Even if a node has two labels in the database, Neo4j returns the node # only with the specific label you asked for. Hence we check for labels # ContainerKojiBuild and KojiBuild separately for the same node. build = ContainerKojiBuild.nodes.get_or_none( id_=associated_build['id_']) if not build: build = KojiBuild.nodes.get_or_none( id_=associated_build['id_']) if build and not container_adv: if build.__label__ == 'ContainerKojiBuild': adv.add_label(ContainerAdvisory.__label__) container_adv = True # If this is set, that means it was once part of the advisory but not anymore. # This relationship needs to be deleted if it exists. if associated_build['removed_index_id']: if build: adv.attached_builds.disconnect(build) else: # Query Teiid and create the entry only if the build is not present in Neo4j if not build: attached_build = self.get_koji_build( associated_build['id_']) if attached_build: if self.is_container_build(attached_build): build = ContainerKojiBuild.get_or_create( {'id_': associated_build['id_']})[0] else: build = KojiBuild.get_or_create( {'id_': associated_build['id_']})[0] # This will happen only if we do not find the build we are looking for in Teiid # which shouldn't usually happen under normal conditions if not build: log.warn( 'The Koji build with ID {} was not found in Teiid!' .format(associated_build['id_'])) continue if adv.__label__ != ContainerAdvisory.__label__ \ and build.__label__ == ContainerKojiBuild.__label__: adv.add_label(ContainerAdvisory.__label__) attached_rel = adv.attached_builds.relationship(build) time_attached = associated_build['time_attached'] if attached_rel: if attached_rel.time_attached != time_attached: adv.attached_builds.replace( build, {'time_attached': time_attached}) else: adv.attached_builds.connect( build, {'time_attached': time_attached}) assigned_to = User.get_or_create( {'username': advisory['assigned_to'].split('@')[0]})[0] adv.conditional_connect(adv.assigned_to, assigned_to) reporter = User.get_or_create( {'username': advisory['reporter'].split('@')[0]})[0] adv.conditional_connect(adv.reporter, reporter) for attached_bug in self.get_attached_bugs(advisory['id']): bug = BugzillaBug.get_or_create(attached_bug)[0] adv.attached_bugs.connect(bug)