def run(self, since=None, until=None): """ Run the Freshmaker scraper. :param str since: a datetime to start scraping data from :param str until: a datetime to scrape data until """ if since or until: log.warn('Ignoring the since/until parameter; They do not apply to the' 'Freshmaker scraper') log.info('Starting initial load of Freshmaker events') self.query_api_and_update_neo4j() log.info('Initial load of Freshmaker events complete!')
def serialized_all(self): """ Generate a serialized form of the node that includes all its relationships. :return: a serialized form of the node with relationships :rtype: dictionary :raises RuntimeError: if the label of a Neo4j node can't be mapped back to a neomodel class """ # Avoid circular imports from estuary.models import models_inheritance # A set that will keep track of all properties on the node that weren't returned from Neo4j null_properties = set() # A mapping of Neo4j relationship names in the format of: # { # node_label: { # relationship_name: {direction: (property_name, cardinality_class) ...}, # } # } relationship_map = {} for property_name, relationship in self.__all_relationships__: node_label = relationship.definition['node_class'].__label__ relationship_name = relationship.definition['relation_type'] for label in models_inheritance[node_label]: if label not in relationship_map: relationship_map[label] = {} relationship_direction = relationship.definition['direction'] if relationship_direction == EITHER: # The direction can be coming from either direction, so map both properties = { INCOMING: (property_name, relationship.manager), OUTGOING: (property_name, relationship.manager), } else: properties = { relationship_direction: (property_name, relationship.manager) } if relationship_name not in relationship_map[label]: relationship_map[label][relationship_name] = properties else: relationship_map[label][relationship_name].update( properties) null_properties.add(property_name) # This variable will contain the current node as serialized + all relationships serialized = self.serialized # Get all the direct relationships in both directions results, _ = self.cypher( 'MATCH (a) WHERE id(a)={self} MATCH (a)-[r]-(all) RETURN r, all') for relationship, node in results: # If the starting node in the relationship is the same as the node being serialized, # we know that the relationship is outgoing if relationship.start_node.id == self.id: direction = OUTGOING else: direction = INCOMING # Convert the Neo4j result into a model object inflated_node = inflate_node(node) try: property_name, cardinality_class = \ relationship_map[inflated_node.__label__][relationship.type][direction] except KeyError: if direction == OUTGOING: direction_text = 'outgoing' else: direction_text = 'incoming' log.warn( 'An {0} {1} relationship of {2!r} with {3!r} is not mapped in the models and ' 'will be ignored'.format(direction_text, relationship.type, self, inflate_node)) continue if not serialized.get(property_name): null_properties.remove(property_name) if cardinality_class in (One, ZeroOrOne): serialized[property_name] = inflated_node.serialized else: if not serialized.get(property_name): serialized[property_name] = [] serialized[property_name].append(inflated_node.serialized) # Neo4j won't return back relationships it doesn't know about, so just make them empty # so that the keys are always consistent for property_name in null_properties: prop = getattr(self, property_name) if isinstance(prop, One) or isinstance(prop, ZeroOrOne): serialized[property_name] = None else: serialized[property_name] = [] return serialized
def update_neo4j(self, advisories): """ Update Neo4j with Errata Tool advisories from Teiid. :param list advisories: a list of dictionaries of advisories """ count = 0 for advisory in advisories: count += 1 log.info('Processing advisory {0}/{1}'.format( count, len(advisories))) # The content_types column is a string with YAML in it, so convert it to a list content_types = yaml.safe_load(advisory['content_types']) adv = Advisory.create_or_update({ 'actual_ship_date': advisory['actual_ship_date'], 'advisory_name': advisory['advisory_name'], 'content_types': content_types, 'created_at': advisory['created_at'], 'id_': advisory['id'], 'issue_date': advisory['issue_date'], 'product_name': advisory['product_name'], 'product_short_name': advisory['product_short_name'], 'release_date': advisory['release_date'], 'security_impact': advisory['security_impact'], 'security_sla': advisory['security_sla'], 'state': advisory['state'], 'status_time': advisory['status_time'], 'synopsis': advisory['synopsis'], 'update_date': advisory['update_date'], })[0] container_adv = False for associated_build in self.get_associated_builds(advisory['id']): # Even if a node has two labels in the database, Neo4j returns the node # only with the specific label you asked for. Hence we check for labels # ContainerKojiBuild and KojiBuild separately for the same node. build = ContainerKojiBuild.nodes.get_or_none( id_=associated_build['id_']) if not build: build = KojiBuild.nodes.get_or_none( id_=associated_build['id_']) if build and not container_adv: if build.__label__ == 'ContainerKojiBuild': adv.add_label(ContainerAdvisory.__label__) container_adv = True # If this is set, that means it was once part of the advisory but not anymore. # This relationship needs to be deleted if it exists. if associated_build['removed_index_id']: if build: adv.attached_builds.disconnect(build) else: # Query Teiid and create the entry only if the build is not present in Neo4j if not build: attached_build = self.get_koji_build( associated_build['id_']) if attached_build: if self.is_container_build(attached_build): build = ContainerKojiBuild.get_or_create( {'id_': associated_build['id_']})[0] else: build = KojiBuild.get_or_create( {'id_': associated_build['id_']})[0] # This will happen only if we do not find the build we are looking for in Teiid # which shouldn't usually happen under normal conditions if not build: log.warn( 'The Koji build with ID {} was not found in Teiid!' .format(associated_build['id_'])) continue if adv.__label__ != ContainerAdvisory.__label__ \ and build.__label__ == ContainerKojiBuild.__label__: adv.add_label(ContainerAdvisory.__label__) attached_rel = adv.attached_builds.relationship(build) time_attached = associated_build['time_attached'] if attached_rel: if attached_rel.time_attached != time_attached: adv.attached_builds.replace( build, {'time_attached': time_attached}) else: adv.attached_builds.connect( build, {'time_attached': time_attached}) assigned_to = User.get_or_create( {'username': advisory['assigned_to'].split('@')[0]})[0] adv.conditional_connect(adv.assigned_to, assigned_to) reporter = User.get_or_create( {'username': advisory['reporter'].split('@')[0]})[0] adv.conditional_connect(adv.reporter, reporter) for attached_bug in self.get_attached_bugs(advisory['id']): bug = BugzillaBug.get_or_create(attached_bug)[0] adv.attached_bugs.connect(bug)