def test_get_stories_not_available(client): """Test getting a resource story on a resource that can't have a story.""" User.get_or_create({'username': '******'}) rv = client.get('/api/v1/story/user/tbrady') expected = { 'message': 'The story is not available for this kind of resource', 'status': 400 } assert rv.status_code == 400 assert json.loads(rv.data.decode('utf-8')) == expected
def test_conditional_connect_zero_or_one(): """Test EstuaryStructuredNode.conditional_connect on a ZerorOrOne relationship.""" adv = Advisory(id_='12345', advisory_name='RHBA-2017:27760-01').save() tbrady = User(username='******').save() thanks = User(username='******').save() assert len(adv.assigned_to) == 0 EstuaryStructuredNode.conditional_connect(adv.assigned_to, tbrady) assert tbrady in adv.assigned_to assert len(adv.assigned_to) == 1 EstuaryStructuredNode.conditional_connect(adv.assigned_to, thanks) assert tbrady not in adv.assigned_to assert thanks in adv.assigned_to assert len(adv.assigned_to) == 1
def test_conditional_connect_one(): """Test EstuaryStructuredNode.conditional_connect on a One relationship.""" class TestModel(EstuaryStructuredNode): id_ = UniqueIdProperty(db_property='id') owner = RelationshipTo('estuary.models.user.User', 'OWNS', cardinality=One) tbrady = User(username='******').save() thanks = User(username='******').save() test = TestModel(id_='12345').save() test.owner.connect(tbrady) with pytest.raises(NotImplementedError) as exc_info: EstuaryStructuredNode.conditional_connect(test.owner, thanks) assert 'conditional_connect doesn\'t support cardinality of one' == str( exc_info.value)
def bug_handler(self, msg): """ Handle a modified or created Bugzilla bug and update Neo4j if necessary. :param dict msg: a message to be processed """ bug_data = msg['body']['msg']['bug'] bug_params = { 'id_': str(bug_data['id']), 'creation_time': timestamp_to_datetime(bug_data['creation_time']), 'modified_time': timestamp_to_datetime(bug_data['last_change_time']), 'priority': bug_data['priority'], 'product_name': bug_data['product']['name'], 'product_version': bug_data['version']['name'], 'resolution': bug_data['resolution'], 'severity': bug_data['severity'], 'short_description': bug_data['summary'], 'status': bug_data['status']['name'], 'target_milestone': bug_data['target_milestone']['name'], } assignee = User.create_or_update({ 'username': bug_data['assigned_to']['login'].split('@')[0], 'email': bug_data['assigned_to']['login'] })[0] qa_contact = User.create_or_update({ 'username': bug_data['qa_contact']['login'].split('@')[0], 'email': bug_data['qa_contact']['login'] })[0] reporter = User.create_or_update({ 'username': bug_data['reporter']['login'].split('@')[0], 'email': bug_data['reporter']['login'] })[0] bug = BugzillaBug.create_or_update(bug_params)[0] bug.conditional_connect(bug.assignee, assignee) bug.conditional_connect(bug.qa_contact, qa_contact) bug.conditional_connect(bug.reporter, reporter)
def commit_handler(self, msg): """ Handle a dist-git commit message and update Neo4j if necessary. :param dict msg: a message to be processed """ repo = DistGitRepo.get_or_create({ 'namespace': msg['headers']['namespace'], 'name': msg['headers']['repo'] })[0] # Get the username from the email if the email is a Red Hat email email = msg['headers']['email'].lower() if email.endswith('@redhat.com'): username = email.split('@redhat.com')[0] else: username = email author = User.create_or_update({ 'username': username, 'email': email })[0] commit_message = msg['body']['msg']['message'] commit = DistGitCommit.create_or_update({ 'hash_': msg['headers']['rev'], 'log_message': commit_message, 'author_date': timestamp_to_datetime(msg['body']['msg']['author_date']), 'commit_date': timestamp_to_datetime(msg['body']['msg']['commit_date']) })[0] bug_rel_mapping = self.parse_bugzilla_bugs(commit_message) for bug_id in bug_rel_mapping['resolves']: bug = BugzillaBug.get_or_create({'id_': bug_id})[0] commit.resolved_bugs.connect(bug) for bug_id in bug_rel_mapping['related']: bug = BugzillaBug.get_or_create({'id_': bug_id})[0] commit.related_bugs.connect(bug) for bug_id in bug_rel_mapping['reverted']: bug = BugzillaBug.get_or_create({'id_': bug_id})[0] commit.reverted_bugs.connect(bug) commit.conditional_connect(commit.author, author) repo.commits.connect(commit)
def create_user_node(self, email): """ Create a User node in Neo4j. :param str email: the user's email :return: User object """ # If email is a Red Hat email address, username is same as domain name # prefix in the email address else store email as username if email.split('@')[1] == 'redhat.com': username = email.split('@')[0] else: username = email user = User.create_or_update(dict(username=username, email=email))[0] return user
def advisory_handler(self, msg): """ Handle an Errata tool advisory changes and update Neo4j if necessary. :param dict msg: a message to be processed """ advisory_id = msg['body']['headers']['errata_id'] erratum_url = '{0}/api/v1/erratum/{1}'.format( self.config['estuary_updater.errata_url'].rstrip('/'), advisory_id) response = requests.get(erratum_url, auth=requests_kerberos.HTTPKerberosAuth(), timeout=10) advisory_json = response.json() advisory_type = msg['body']['headers']['type'].lower() advisory_info = advisory_json['errata'][advisory_type] embargoed = msg['body']['headers']['synopsis'] == 'REDACTED' # We can't store information on embargoed advisories other than the ID if not embargoed: product_url = '{0}/products/{1}.json'.format( self.config['estuary_updater.errata_url'].rstrip('/'), advisory_info['product_id']) response = requests.get(product_url, auth=requests_kerberos.HTTPKerberosAuth(), timeout=10) product_json = response.json() reporter_url = '{0}/api/v1/user/{1}'.format( self.config['estuary_updater.errata_url'].rstrip('/'), advisory_info['reporter_id']) response = requests.get(reporter_url, auth=requests_kerberos.HTTPKerberosAuth(), timeout=10) reporter_json = response.json() reporter = User.create_or_update({ 'username': reporter_json['login_name'].split('@')[0], 'email': reporter_json['email_address'] })[0] assigned_to_url = '{0}/api/v1/user/{1}'.format( self.config['estuary_updater.errata_url'].rstrip('/'), advisory_info['assigned_to_id']) response = requests.get(assigned_to_url, auth=requests_kerberos.HTTPKerberosAuth(), timeout=10) assigned_to_json = response.json() assigned_to = User.create_or_update({ 'username': assigned_to_json['login_name'].split('@')[0], 'email': assigned_to_json['email_address'] })[0] advisory_params = { 'advisory_name': advisory_info['fulladvisory'], 'id_': advisory_id, 'product_name': product_json['product']['name'], 'security_impact': advisory_info['security_impact'], 'state': advisory_info['status'], 'synopsis': msg['body']['headers']['synopsis'] } for dt in ('actual_ship_date', 'created_at', 'issue_date', 'release_date', 'security_sla', 'status_updated_at', 'update_date'): if advisory_info[dt]: if dt == 'status_updated_at': estuary_key = 'status_time' else: estuary_key = dt advisory_params[estuary_key] = timestamp_to_datetime( advisory_info[dt]) else: advisory_params = { 'id_': advisory_id, # Set this to REDACTED and it'll be updated when it becomes public 'advisory_name': 'REDACTED' } if 'docker' in advisory_info['content_types']: try: advisory = ContainerAdvisory.create_or_update( advisory_params)[0] except neomodel.exceptions.ConstraintValidationFailed: # This must have errantly been created as an Advisory instead of a # ContainerAdvisory, so let's fix that. advisory = Advisory.nodes.get_or_none(id_=advisory_id) if not advisory: # If there was a constraint validation failure and the advisory isn't just # the wrong label, then we can't recover. raise advisory.add_label(ContainerAdvisory.__label__) advisory = ContainerAdvisory.create_or_update( advisory_params)[0] else: # Check to see if a ContainerAdvisory using this id already exists, and if so remove its # label because it should not be a ContainerAdvisory if docker isn't a content type. container_adv = ContainerAdvisory.nodes.get_or_none( id_=advisory_id) if container_adv: container_adv.remove_label(ContainerAdvisory.__label__) advisory = Advisory.create_or_update(advisory_params)[0] if not embargoed: advisory.conditional_connect(advisory.reporter, reporter) advisory.conditional_connect(advisory.assigned_to, assigned_to) bugs = advisory_json['bugs']['bugs'] for bug in bugs: bug = BugzillaBug.get_or_create({'id_': bug['bug']['id']})[0] advisory.attached_bugs.connect(bug)
def update_neo4j(self, advisories): """ Update Neo4j with Errata Tool advisories from Teiid. :param list advisories: a list of dictionaries of advisories """ count = 0 for advisory in advisories: count += 1 log.info('Processing advisory {0}/{1}'.format( count, len(advisories))) # The content_types column is a string with YAML in it, so convert it to a list content_types = yaml.safe_load(advisories[0]['content_types']) adv = Advisory.create_or_update({ 'actual_ship_date': advisory['actual_ship_date'], 'advisory_name': advisory['advisory_name'], 'content_types': content_types, 'created_at': advisory['created_at'], 'id_': advisory['id'], 'issue_date': advisory['issue_date'], 'product_name': advisory['product_name'], 'product_short_name': advisory['product_short_name'], 'release_date': advisory['release_date'], 'security_impact': advisory['security_impact'], 'security_sla': advisory['security_sla'], 'state': advisory['state'], 'status_time': advisory['status_time'], 'synopsis': advisory['synopsis'], 'type_': advisory['type'], 'update_date': advisory['update_date'], 'updated_at': advisory['updated_at'] })[0] assigned_to = User.get_or_create( {'username': advisory['assigned_to'].split('@')[0]})[0] adv.conditional_connect(adv.assigned_to, assigned_to) package_owner = User.get_or_create( {'username': advisory['package_owner'].split('@')[0]})[0] adv.conditional_connect(adv.package_owner, package_owner) reporter = User.get_or_create( {'username': advisory['reporter'].split('@')[0]})[0] adv.conditional_connect(adv.reporter, reporter) for state in self.get_advisory_states(advisory['id']): adv_state = AdvisoryState.create_or_update({ 'id_': state['id'], 'name': state['name'], 'created_at': state['created_at'], 'updated_at': state['updated_at'] })[0] adv_state.conditional_connect(adv_state.advisory, adv) state_creator = User.get_or_create( {'username': state['username'].split('@')[0]})[0] adv_state.conditional_connect(adv_state.creator, state_creator) for attached_bug in self.get_attached_bugs(advisory['id']): bug = BugzillaBug.get_or_create(attached_bug)[0] adv.attached_bugs.connect(bug) for associated_build in self.get_associated_builds(advisory['id']): # If this is set, that means it was once part of the advisory but not anymore. # This relationship needs to be deleted if it exists. if associated_build['removed_index_id']: build = KojiBuild.nodes.get_or_none( id_=associated_build['id_']) if build: adv.attached_builds.disconnect(build) else: # This key shouldn't be stored in Neo4j del associated_build['removed_index_id'] build = KojiBuild.get_or_create(associated_build)[0] adv.attached_builds.connect(build)
def update_neo4j(self, advisories): """ Update Neo4j with Errata Tool advisories from Teiid. :param list advisories: a list of dictionaries of advisories """ count = 0 for advisory in advisories: count += 1 log.info('Processing advisory {0}/{1}'.format( count, len(advisories))) # The content_types column is a string with YAML in it, so convert it to a list content_types = yaml.safe_load(advisory['content_types']) adv = Advisory.create_or_update({ 'actual_ship_date': advisory['actual_ship_date'], 'advisory_name': advisory['advisory_name'], 'content_types': content_types, 'created_at': advisory['created_at'], 'id_': advisory['id'], 'issue_date': advisory['issue_date'], 'product_name': advisory['product_name'], 'product_short_name': advisory['product_short_name'], 'release_date': advisory['release_date'], 'security_impact': advisory['security_impact'], 'security_sla': advisory['security_sla'], 'state': advisory['state'], 'status_time': advisory['status_time'], 'synopsis': advisory['synopsis'], 'update_date': advisory['update_date'], })[0] container_adv = False for associated_build in self.get_associated_builds(advisory['id']): # Even if a node has two labels in the database, Neo4j returns the node # only with the specific label you asked for. Hence we check for labels # ContainerKojiBuild and KojiBuild separately for the same node. build = ContainerKojiBuild.nodes.get_or_none( id_=associated_build['id_']) if not build: build = KojiBuild.nodes.get_or_none( id_=associated_build['id_']) if build and not container_adv: if build.__label__ == 'ContainerKojiBuild': adv.add_label(ContainerAdvisory.__label__) container_adv = True # If this is set, that means it was once part of the advisory but not anymore. # This relationship needs to be deleted if it exists. if associated_build['removed_index_id']: if build: adv.attached_builds.disconnect(build) else: # Query Teiid and create the entry only if the build is not present in Neo4j if not build: attached_build = self.get_koji_build( associated_build['id_']) if attached_build: if self.is_container_build(attached_build): build = ContainerKojiBuild.get_or_create( {'id_': associated_build['id_']})[0] else: build = KojiBuild.get_or_create( {'id_': associated_build['id_']})[0] # This will happen only if we do not find the build we are looking for in Teiid # which shouldn't usually happen under normal conditions if not build: log.warn( 'The Koji build with ID {} was not found in Teiid!' .format(associated_build['id_'])) continue if adv.__label__ != ContainerAdvisory.__label__ \ and build.__label__ == ContainerKojiBuild.__label__: adv.add_label(ContainerAdvisory.__label__) attached_rel = adv.attached_builds.relationship(build) time_attached = associated_build['time_attached'] if attached_rel: if attached_rel.time_attached != time_attached: adv.attached_builds.replace( build, {'time_attached': time_attached}) else: adv.attached_builds.connect( build, {'time_attached': time_attached}) assigned_to = User.get_or_create( {'username': advisory['assigned_to'].split('@')[0]})[0] adv.conditional_connect(adv.assigned_to, assigned_to) reporter = User.get_or_create( {'username': advisory['reporter'].split('@')[0]})[0] adv.conditional_connect(adv.reporter, reporter) for attached_bug in self.get_attached_bugs(advisory['id']): bug = BugzillaBug.get_or_create(attached_bug)[0] adv.attached_bugs.connect(bug)
def test_get_resources(client, resource, uid, expected): """Test getting a resource from Neo4j with its relationships.""" tbrady = User.get_or_create({ 'email': '*****@*****.**', 'username': '******' })[0] mprahl = User.get_or_create({ 'email': '*****@*****.**', 'username': '******' })[0] jsmith = User.get_or_create({ 'email': '*****@*****.**', 'username': '******' })[0] commit = DistGitCommit.get_or_create({ 'author_date': datetime(2017, 4, 26, 11, 44, 38), 'commit_date': datetime(2017, 4, 26, 11, 44, 38), 'hash_': '8a63adb248ba633e200067e1ad6dc61931727bad', 'log_message': 'Related: #12345 - fix xyz' })[0] commit_two = DistGitCommit.get_or_create({ 'author_date': datetime(2017, 4, 27, 11, 44, 38), 'commit_date': datetime(2017, 4, 27, 11, 44, 38), 'hash_': '1263adb248ba633e205067e1ad6dc61931727c2d', 'log_message': 'Related: #12345 - fix xz' })[0] commit_three = DistGitCommit.get_or_create({ 'author_date': datetime(2017, 4, 27, 11, 44, 38), 'commit_date': datetime(2017, 4, 27, 11, 44, 38), 'hash_': '5663adb248ba633e205067e1ad6dc61931727123', 'log_message': 'Revert: #12345' })[0] bug = BugzillaBug.get_or_create({ 'classification': 'Red Hat', 'creation_time': datetime(2017, 4, 2, 19, 39, 6), 'id_': '12345', 'modified_time': datetime(2018, 2, 7, 19, 30, 47), 'priority': 'high', 'product_name': 'Red Hat Enterprise Linux', 'product_version': '7.5', 'resolution': '', 'severity': 'low', 'short_description': 'Some description', 'status': 'VERIFIED', 'target_milestone': 'rc', 'votes': 0 })[0] bug_two = BugzillaBug.get_or_create({ 'classification': 'Red Hat', 'creation_time': datetime(2017, 4, 2, 19, 39, 6), 'id_': '67890', 'modified_time': datetime(2018, 2, 7, 19, 30, 47), 'priority': 'medium', 'product_name': 'Red Hat Enterprise Linux', 'product_version': '7.3', 'resolution': '', 'severity': 'low', 'short_description': 'Some description', 'status': 'VERIFIED', 'target_milestone': 'rc', 'votes': 0 })[0] bug_three = BugzillaBug.get_or_create({ 'classification': 'Red Hat', 'creation_time': datetime(2017, 4, 2, 19, 39, 6), 'id_': '272895', 'modified_time': datetime(2018, 2, 7, 19, 30, 47), 'priority': 'low', 'product_name': 'Satellite', 'product_version': '3', 'resolution': '', 'severity': 'medium', 'short_description': 'Some description', 'status': 'VERIFIED', 'target_milestone': 'rc', 'votes': 0 })[0] repo = DistGitRepo.get_or_create({ 'name': 'some_repo', 'namespace': 'some_namespace', })[0] branch = DistGitBranch.get_or_create({ 'name': 'some_branch_name', 'repo_name': 'some_repo_name', 'repo_namespace': 'some_repo_namespace' })[0] build = KojiBuild.get_or_create({ 'completion_time': datetime(2017, 4, 2, 19, 39, 6), 'creation_time': datetime(2017, 4, 2, 19, 39, 6), 'epoch': '0', 'id_': '2345', 'name': 'slf4j', 'release': '4.el7_4', 'start_time': datetime(2017, 4, 2, 19, 39, 6), 'state': 1, 'version': '1.7.4' })[0] tag = KojiTag.get_or_create({ 'id_': '2702', 'name': 'some_active_tag' })[0] advisory = Advisory.get_or_create({ 'actual_ship_date': datetime(2017, 8, 1, 15, 43, 51), 'advisory_name': 'RHBA-2017:2251-02', 'content_types': ['docker'], 'created_at': datetime(2017, 4, 3, 14, 47, 23), 'id_': '27825', 'issue_date': datetime(2017, 8, 1, 5, 59, 34), 'product_name': 'Red Hat Enterprise Linux', 'product_short_name': 'RHEL', 'security_impact': 'None', 'state': 'SHIPPED_LIVE', 'status_time': datetime(2017, 8, 1, 15, 43, 51), 'synopsis': 'cifs-utils bug fix update', 'update_date': datetime(2017, 8, 1, 7, 16) })[0] fm_event = FreshmakerEvent.get_or_create({ 'event_type_id': 8, 'id_': '1180', 'message_id': 'ID:messaging-devops-broker01.test', 'state': 2, 'state_name': 'COMPLETE', 'state_reason': 'All container images have been rebuilt', 'time_created': datetime(2019, 8, 21, 13, 42, 20), 'time_done': datetime(2099, 8, 21, 13, 42, 20) })[0] fm_build = FreshmakerBuild.get_or_create({ 'id_': 398, 'build_id': 15639305, 'dep_on': "jboss-eap-7-eap70-openshift-docker", 'name': "metrics-hawkular-metrics-docker", 'original_nvr': "metrics-hawkular-metrics-docker-v3.7.23-10", 'rebuilt_nvr': "metrics-hawkular-metrics-docker-v3.7.23-10.1522094767", 'state': 1, 'state_name': "DONE", 'state_reason': "Built successfully.", 'time_completed': datetime(2017, 4, 2, 19, 39, 6), 'time_submitted': datetime(2017, 4, 2, 19, 39, 6), 'type_': 1, 'type_name': "IMAGE", 'url': "/api/1/builds/398" })[0] cb = ContainerKojiBuild.get_or_create({ 'completion_time': datetime(2017, 4, 2, 19, 39, 6), 'creation_time': datetime(2017, 4, 2, 19, 39, 6), 'epoch': '0', 'id_': '710', 'name': 'slf4j_2', 'release': '4.el7_4_as', 'start_time': datetime(2017, 4, 2, 19, 39, 6), 'state': 1, 'version': '1.7.4' })[0] if resource == 'bugzillabug': bug.assignee.connect(mprahl) bug.qa_contact.connect(jsmith) bug.reporter.connect(tbrady) commit.resolved_bugs.connect(bug) commit_two.resolved_bugs.connect(bug) commit_three.reverted_bugs.connect(bug) advisory.attached_bugs.connect(bug) if resource == 'distgitcommit': commit.author.connect(tbrady) commit.parent.connect(commit_two) commit_three.parent.connect(commit) commit.related_bugs.connect(bug) commit.related_bugs.connect(bug_three) commit.reverted_bugs.connect(bug_two) repo.commits.connect(commit) branch.commits.connect(commit) commit.resolved_bugs.connect(bug) commit.resolved_bugs.connect(bug_two) if resource == 'kojibuild': build.owner.connect(mprahl) build.commit.connect(commit_two) tag.builds.connect(build) if resource == 'advisory': advisory.assigned_to.connect(mprahl) advisory.reporter.connect(jsmith) advisory.attached_builds.connect(build) advisory.attached_bugs.connect(bug) if resource == 'freshmakerevent': fm_event.triggered_by_advisory.connect(advisory) fm_event.successful_koji_builds.connect(cb) fm_event.requested_builds.connect(fm_build) if resource == 'containerbuild': fm_event.successful_koji_builds.connect(cb) rv = client.get('/api/v1/{0}/{1}'.format(resource, uid)) assert rv.status_code == 200 assert json.loads(rv.data.decode('utf-8')) == expected
def get_or_create_build(self, identifier, original_nvr=None, force_container_label=False): """ Get a Koji build from Neo4j, or create it if it does not exist in Neo4j. :param str/int identifier: an NVR (str) or build ID (int), or a dict of info from Koji API :kwarg str original_nvr: original_nvr property for the ContainerKojiBuild :kwarg bool force_container_label: when true, this skips the check to see if the build is a container and just creates the build with the ContainerKojiBuild label :rtype: KojiBuild :return: the Koji Build retrieved or created from Neo4j """ if type(identifier) is dict: build_info = identifier else: try: build_info = self.koji_session.getBuild(identifier, strict=True) except Exception: log.error( 'Failed to get brew build using the identifier {0}'.format( identifier)) raise build_params = { 'epoch': build_info['epoch'], 'id_': str(build_info['id']), 'name': build_info['package_name'], 'release': build_info['release'], 'state': build_info['state'], 'version': build_info['version'] } if build_info.get('extra'): build_params['extra'] = json.dumps(build_info['extra']) # To handle the case when a message has a null timestamp for time_key in ('completion_time', 'creation_time', 'start_time'): # Certain Koji API endpoints omit the *_ts values but have the *_time values, so that's # why the *_time values are used if build_info[time_key]: ts_format = r'%Y-%m-%d %H:%M:%S' if len(build_info[time_key].rsplit('.', 1)) == 2: # If there are microseconds, go ahead and parse that too ts_format += r'.%f' build_params[time_key] = datetime.strptime( build_info[time_key], ts_format) owner = User.create_or_update({ 'username': build_info['owner_name'], 'email': '{0}@redhat.com'.format(build_info['owner_name']) })[0] if force_container_label or self.is_container_build(build_info): if original_nvr: build_params['original_nvr'] = original_nvr build = ContainerKojiBuild.create_or_update(build_params)[0] elif self.is_module_build(build_info): module_extra_info = build_info['extra'].get('typeinfo', {}).get('module') build_params['context'] = module_extra_info.get('context') build_params['mbs_id'] = module_extra_info.get( 'module_build_service_id') build_params['module_name'] = module_extra_info.get('name') build_params['module_stream'] = module_extra_info.get('stream') build_params['module_version'] = module_extra_info.get('version') try: build = ModuleKojiBuild.create_or_update(build_params)[0] except neomodel.exceptions.ConstraintValidationFailed: # This must have errantly been created as a KojiBuild instead of a # ModuleKojiBuild, so let's fix that. build = KojiBuild.nodes.get_or_none(id_=build_params['id_']) if not build: # If there was a constraint validation failure and the build isn't just the # wrong label, then we can't recover. raise build.add_label(ModuleKojiBuild.__label__) build = ModuleKojiBuild.create_or_update(build_params)[0] else: build = KojiBuild.create_or_update(build_params)[0] build.conditional_connect(build.owner, owner) return build
def _update_neo4j(neo4j_url, total_results, counter_and_results): """ Update Neo4j results via mapping with multiprocessing. :param str neo4j_url: database url for Neo4j :param int total_results: the total number of results that will be processed. This is used for a logging statement about progress. :param tuple counter_and_results: a tuple where the first index is the current counter and the second index is a list of dictionaries representing results from Teiid """ try: previous_total = counter_and_results[0] results = counter_and_results[1] # Since _update_neo4j will be run in a separate process, we must configure the database # URL every time the method is run. neomodel_config.DATABASE_URL = neo4j_url # Create a thread pool with 4 threads to speed up queries to cgit pool = ThreadPool(4) counter = 0 for result in results: if counter % 200 == 0: until = counter + 200 if until > len(results): until = len(results) # Because of the joins in the SQL query, we end up with several rows with the # same commit hash and we only want to query cgit once per commit unique_commits = set([(c['module'], c['sha']) for c in results[counter:until]]) log.debug( 'Getting the author email addresses from cgit in parallel ' 'for results {0} to {1}'.format(counter, until)) repos_info = { r['commit']: r for r in pool.map(DistGitScraper._get_repo_info, unique_commits) } # This is no longer needed so it can be cleared to save RAM del unique_commits counter += 1 log.info('Processing commit entry {0}/{1}'.format( previous_total + counter, total_results)) repo_info = repos_info[result['sha']] if not repo_info.get('namespace'): log.info( 'Skipping nodes creation with commit ID {0}'.format( result['commit_id'])) continue log.debug( 'Creating nodes associated with commit ID {0}'.format( result['commit_id'])) repo = DistGitRepo.get_or_create({ 'namespace': repo_info['namespace'], 'name': result['module'] })[0] commit = DistGitCommit.create_or_update({ 'author_date': result['author_date'], 'commit_date': result['commit_date'], 'hash_': result['sha'], # In case we get unicode characters in Python 2 'log_message': bytes(result['log_message'], 'utf-8').decode() })[0] bug = BugzillaBug.get_or_create({'id_': result['bugzilla_id']})[0] log.debug( 'Creating the user nodes associated with commit ID {0}'. format(result['commit_id'])) author = User.create_or_update({ 'username': repo_info['author_username'], 'email': repo_info['author_email'] })[0] log.debug( 'Creating the relationships associated with commit ID {0}'. format(result['commit_id'])) repo.commits.connect(commit) commit.conditional_connect(commit.author, author) if result['bugzilla_type'] == 'related': commit.related_bugs.connect(bug) elif result['bugzilla_type'] == 'resolves': commit.resolved_bugs.connect(bug) elif result['bugzilla_type'] == 'reverted': commit.reverted_bugs.connect(bug) # This is no longer needed so it can be cleared to save RAM del repo_info finally: # Close the DB connection after this is done processing db.driver.close()
def update_neo4j(self, results): """ Update Neo4j with the dist-git commit and push information from Teiid. :param list results: a list of dictionaries """ pool = Pool(processes=8) counter = 0 for result in results: if counter % 200 == 0: until = counter + 200 if until > len(results): until = len(results) # Because of the joins in the SQL query, we end up with several rows with the same # commit hash and we only want to query cgit once per commit unique_commits = set([(c['module'], c['sha']) for c in results[counter:until]]) log.debug( 'Getting the author and committer email addresses from cgit in parallel ' 'for results {0} to {1}'.format(counter, until)) repos_info = {} for _r in pool.map(DistGitScraper._get_repo_info, unique_commits): r = json.loads(_r) repos_info[r['commit']] = r # This is no longer needed so it can be cleared to save RAM del unique_commits # A lot of RAM was allocated or used up, so let's call gc.collect() to ensure it # is removed gc.collect() counter += 1 log.info('Processing commit and push entry {0}/{1}'.format( str(counter), str(len(results)))) repo_info = repos_info[result['sha']] if not repo_info.get('namespace'): log.info( 'Skipping nodes creation with commit ID {0} and push ID {1}' .format(result['commit_id'], result['push_id'])) continue log.debug( 'Creating nodes associated with commit ID {0} and push ID {1}'. format(result['commit_id'], result['push_id'])) repo = DistGitRepo.get_or_create({ 'namespace': repo_info['namespace'], 'name': result['module'] })[0] branch_name = result['ref'].rsplit('/', 1)[1] branch = DistGitBranch.get_or_create({ 'name': branch_name, 'repo_namespace': repo_info['namespace'], 'repo_name': result['module'] })[0] commit = DistGitCommit.create_or_update({ 'author_date': result['author_date'], 'commit_date': result['commit_date'], 'hash_': result['sha'], # In case we get unicode characters in Python 2 'log_message': bytes(result['log_message'], 'utf-8').decode() })[0] push = DistGitPush.get_or_create({ 'id_': result['push_id'], 'push_date': result['push_date'], 'push_ip': result['push_ip'] })[0] bug = BugzillaBug.get_or_create({'id_': result['bugzilla_id']})[0] log.debug( 'Creating the user nodes associated with commit ID {0} and push ID {1}' .format(result['commit_id'], result['push_id'])) author = User.create_or_update({ 'username': repo_info['author_username'], 'email': repo_info['author_email'] })[0] committer = User.create_or_update({ 'username': repo_info['committer_username'], 'email': repo_info['committer_email'] })[0] pusher = User.get_or_create({'username': result['pusher']})[0] log.debug( 'Creating the relationships associated with commit ID {0} and push ID {1}' .format(result['commit_id'], result['push_id'])) repo.contributors.connect(author) repo.contributors.connect(committer) repo.contributors.connect(pusher) repo.commits.connect(commit) repo.pushes.connect(push) repo.branches.connect(branch) branch.contributors.connect(author) branch.contributors.connect(committer) branch.contributors.connect(pusher) branch.commits.connect(commit) branch.pushes.connect(push) push.conditional_connect(push.pusher, pusher) push.commits.connect(commit) commit.conditional_connect(commit.author, author) commit.conditional_connect(commit.committer, committer) if repo_info['parent']: parent_commit = DistGitCommit.get_or_create( {'hash_': repo_info['parent']})[0] commit.conditional_connect(commit.parent, parent_commit) if result['bugzilla_type'] == 'related': commit.related_bugs.connect(bug) elif result['bugzilla_type'] == 'resolves': commit.resolved_bugs.connect(bug) elif result['bugzilla_type'] == 'reverted': commit.reverted_bugs.connect(bug) # This is no longer needed so it can be cleared to save RAM del repo_info
def update_neo4j(self, builds): """ Update Neo4j with Koji build information from Teiid. :param list builds: a list of dictionaries """ # Uploads builds data to their respective nodes log.info('Beginning to upload data to Neo4j') count = 0 for build_dict in builds: build_params = dict( id_=build_dict['id'], epoch=build_dict['epoch'], state=build_dict['state'], creation_time=build_dict['creation_time'], start_time=build_dict['start_time'], completion_time=build_dict['completion_time'], extra=build_dict['extra'], name=build_dict['package_name'], version=build_dict['version'], release=build_dict['release'] ) package_name = build_dict['package_name'] try: extra_json = json.loads(build_dict['extra']) except (ValueError, TypeError): extra_json = {} container_build = False # Checking a heuristic for determining if a build is a container build since, currently # there is no definitive way to do it. if extra_json and extra_json.get('container_koji_build_id'): container_build = True # Checking another heuristic for determining if a build is a container build since # currently there is no definitive way to do it. elif (package_name.endswith('-container') or package_name.endswith('-docker')): container_build = True if container_build: build = ContainerKojiBuild.create_or_update(build_params)[0] else: build = KojiBuild.create_or_update(build_params)[0] if build_dict['owner_username']: username = build_dict['owner_username'].split('@')[0] else: username = build_dict['owner_name'] user = User.get_or_create(dict(username=username))[0] build.conditional_connect(build.owner, user) tags = self.get_build_tags(build_dict['id']) current_tag_ids = set() for _tag in tags: current_tag_ids.add(_tag['tag_id']) tag = KojiTag.create_or_update(dict( id_=_tag['tag_id'], name=_tag['tag_name'] ))[0] tag.builds.connect(build) # _tag.id_ must be cast as an int because it is stored as a string in Neo4j since # it's a UniqueIdProperty connected_tags = {int(_tag.id_): _tag for _tag in build.tags.all()} extra_connected_tag_ids = set(connected_tags.keys()) - current_tag_ids for tag_id in extra_connected_tag_ids: build.tags.disconnect(connected_tags[tag_id]) count += 1 log.info('Uploaded {0} builds out of {1}'.format(count, len(builds))) try: extra_json = json.loads(build_dict['extra']) except (ValueError, TypeError): extra_json = {} container_koji_task_id = extra_json.get('container_koji_task_id') if build_dict['task_id']: task_id = build_dict['task_id'] elif container_koji_task_id: task_id = container_koji_task_id else: # Continue if the task_id is None continue # Getting task related to the current build task_dict = self.get_task(task_id)[0] xml_root = ET.fromstring(task_dict['request']) commit_hash = None for child in xml_root.iter('string'): if child.text and child.text.startswith('git'): commit_hash = child.text.rsplit('#', 1)[1] break if not task_dict: # Continue if no corresponding task found continue task = KojiTask.create_or_update(dict( id_=task_dict['id'], weight=task_dict['weight'], create_time=task_dict['create_time'], start_time=task_dict['start_time'], completion_time=task_dict['completion_time'], state=task_dict['state'], priority=task_dict['priority'], arch=task_dict['arch'], method=task_dict['method'] ))[0] # Defining Relationships task.builds.connect(build) task.conditional_connect(task.owner, user) if commit_hash: commit = DistGitCommit.get_or_create(dict(hash_=commit_hash))[0] build.conditional_connect(build.commit, commit) child_tasks = self.get_task_children(task_dict['id']) if not child_tasks: # Continue if no corresponding child task found continue for child_task_dict in child_tasks: child_task = KojiTask.create_or_update(dict( id_=child_task_dict['id'], weight=child_task_dict['weight'], create_time=child_task_dict['create_time'], start_time=child_task_dict['start_time'], completion_time=child_task_dict['completion_time'], state=child_task_dict['state'], priority=child_task_dict['priority'], arch=child_task_dict['arch'], method=child_task_dict['method'] ))[0] child_task.conditional_connect(child_task.parent, task)
def update_neo4j(self, builds): """ Update Neo4j with Koji build information from Teiid. :param list builds: a list of dictionaries """ # Uploads builds data to their respective nodes log.info('Beginning to upload data to Neo4j') count = 0 for build_dict in builds: build_params = dict( id_=build_dict['id'], epoch=build_dict['epoch'], state=build_dict['state'], creation_time=build_dict['creation_time'], start_time=build_dict['start_time'], completion_time=build_dict['completion_time'], name=build_dict['package_name'], version=build_dict['version'], release=build_dict['release'] ) try: extra_json = json.loads(build_dict['extra']) except (ValueError, TypeError): extra_json = {} if self.is_container_build(build_dict): build_params['operator'] = bool( extra_json.get('typeinfo', {}).get('operator-manifests', {}).get('archive') ) try: build = ContainerKojiBuild.create_or_update(build_params)[0] except neomodel.exceptions.ConstraintValidationFailed: # This must have errantly been created as a KojiBuild instead of a # ContainerKojiBuild, so let's fix that. build = KojiBuild.nodes.get_or_none(id_=build_params['id_']) if not build: # If there was a constraint validation failure and the build isn't just the # wrong label, then we can't recover. raise build.add_label(ContainerKojiBuild.__label__) build = ContainerKojiBuild.create_or_update(build_params)[0] elif self.is_module_build(build_dict): module_extra_info = extra_json.get('typeinfo', {}).get('module') try: build_params['context'] = module_extra_info.get('context') build_params['mbs_id'] = module_extra_info.get('module_build_service_id') build_params['module_name'] = module_extra_info.get('name') build_params['module_stream'] = module_extra_info.get('stream') build_params['module_version'] = module_extra_info.get('version') build = ModuleKojiBuild.create_or_update(build_params)[0] except neomodel.exceptions.ConstraintValidationFailed: # This must have errantly been created as a KojiBuild instead of a # ModuleKojiBuild, so let's fix that. build = KojiBuild.nodes.get_or_none(id_=build_params['id_']) if not build: # If there was a constraint validation failure and the build isn't just the # wrong label, then we can't recover. raise build.add_label(ModuleKojiBuild.__label__) build = ModuleKojiBuild.create_or_update(build_params)[0] else: build = KojiBuild.create_or_update(build_params)[0] username = build_dict['owner_name'] user = User.get_or_create(dict(username=username))[0] build.conditional_connect(build.owner, user) if build.__label__ == ModuleKojiBuild.__label__: module_build_tag_name = module_extra_info.get('content_koji_tag') if module_build_tag_name: module_components = self.get_tag_info(module_build_tag_name) # Some modules don't have components if module_components: for item in module_components: module_component = KojiBuild.get_or_create(dict( id_=item['build_id'] ))[0] build.components.connect(module_component) component_builds = self.get_build_info( [item['build_id'] for item in module_components]) self.update_neo4j(component_builds) count += 1 log.info('Uploaded {0} builds out of {1}'.format(count, len(builds))) container_koji_task_id = extra_json.get('container_koji_task_id') if build_dict['task_id']: task_id = build_dict['task_id'] elif container_koji_task_id: task_id = container_koji_task_id else: # Continue if the task_id is None continue # Getting task related to the current build try: task_dict = self.get_task(task_id)[0] except IndexError: continue commit_hash = None # Only look for the commit hash if the build is an RPM or container if task_dict['method'] in ('build', 'buildContainer'): xml_root = ET.fromstring(task_dict['request']) for child in xml_root.iter('string'): if child.text and child.text.startswith('git'): commit_hash = child.text.rsplit('#', 1)[1] break if commit_hash: commit = DistGitCommit.get_or_create(dict(hash_=commit_hash))[0] build.conditional_connect(build.commit, commit)