def run(self, since=None, until=None):
        """
        Run the Freshmaker scraper.

        :param str since: a datetime to start scraping data from
        :param str until: a datetime to scrape data until
        """
        if since or until:
            log.warn('Ignoring the since/until parameter; They do not apply to the'
                     'Freshmaker scraper')
        log.info('Starting initial load of Freshmaker events')
        self.query_api_and_update_neo4j()
        log.info('Initial load of Freshmaker events complete!')
Exemple #2
0
    def serialized_all(self):
        """
        Generate a serialized form of the node that includes all its relationships.

        :return: a serialized form of the node with relationships
        :rtype: dictionary
        :raises RuntimeError: if the label of a Neo4j node can't be mapped back to a neomodel class
        """
        # Avoid circular imports
        from estuary.models import models_inheritance
        # A set that will keep track of all properties on the node that weren't returned from Neo4j
        null_properties = set()
        # A mapping of Neo4j relationship names in the format of:
        # {
        #     node_label: {
        #         relationship_name: {direction: (property_name, cardinality_class) ...},
        #     }
        # }
        relationship_map = {}
        for property_name, relationship in self.__all_relationships__:
            node_label = relationship.definition['node_class'].__label__
            relationship_name = relationship.definition['relation_type']
            for label in models_inheritance[node_label]:
                if label not in relationship_map:
                    relationship_map[label] = {}

                relationship_direction = relationship.definition['direction']
                if relationship_direction == EITHER:
                    # The direction can be coming from either direction, so map both
                    properties = {
                        INCOMING: (property_name, relationship.manager),
                        OUTGOING: (property_name, relationship.manager),
                    }
                else:
                    properties = {
                        relationship_direction:
                        (property_name, relationship.manager)
                    }

                if relationship_name not in relationship_map[label]:
                    relationship_map[label][relationship_name] = properties
                else:
                    relationship_map[label][relationship_name].update(
                        properties)
                null_properties.add(property_name)

        # This variable will contain the current node as serialized + all relationships
        serialized = self.serialized
        # Get all the direct relationships in both directions
        results, _ = self.cypher(
            'MATCH (a) WHERE id(a)={self} MATCH (a)-[r]-(all) RETURN r, all')
        for relationship, node in results:
            # If the starting node in the relationship is the same as the node being serialized,
            # we know that the relationship is outgoing
            if relationship.start_node.id == self.id:
                direction = OUTGOING
            else:
                direction = INCOMING

            # Convert the Neo4j result into a model object
            inflated_node = inflate_node(node)
            try:
                property_name, cardinality_class = \
                    relationship_map[inflated_node.__label__][relationship.type][direction]
            except KeyError:
                if direction == OUTGOING:
                    direction_text = 'outgoing'
                else:
                    direction_text = 'incoming'
                log.warn(
                    'An {0} {1} relationship of {2!r} with {3!r} is not mapped in the models and '
                    'will be ignored'.format(direction_text, relationship.type,
                                             self, inflate_node))
                continue

            if not serialized.get(property_name):
                null_properties.remove(property_name)

            if cardinality_class in (One, ZeroOrOne):
                serialized[property_name] = inflated_node.serialized
            else:
                if not serialized.get(property_name):
                    serialized[property_name] = []
                serialized[property_name].append(inflated_node.serialized)

        # Neo4j won't return back relationships it doesn't know about, so just make them empty
        # so that the keys are always consistent
        for property_name in null_properties:
            prop = getattr(self, property_name)
            if isinstance(prop, One) or isinstance(prop, ZeroOrOne):
                serialized[property_name] = None
            else:
                serialized[property_name] = []

        return serialized
Exemple #3
0
    def update_neo4j(self, advisories):
        """
        Update Neo4j with Errata Tool advisories from Teiid.

        :param list advisories: a list of dictionaries of advisories
        """
        count = 0
        for advisory in advisories:
            count += 1
            log.info('Processing advisory {0}/{1}'.format(
                count, len(advisories)))
            # The content_types column is a string with YAML in it, so convert it to a list
            content_types = yaml.safe_load(advisory['content_types'])
            adv = Advisory.create_or_update({
                'actual_ship_date':
                advisory['actual_ship_date'],
                'advisory_name':
                advisory['advisory_name'],
                'content_types':
                content_types,
                'created_at':
                advisory['created_at'],
                'id_':
                advisory['id'],
                'issue_date':
                advisory['issue_date'],
                'product_name':
                advisory['product_name'],
                'product_short_name':
                advisory['product_short_name'],
                'release_date':
                advisory['release_date'],
                'security_impact':
                advisory['security_impact'],
                'security_sla':
                advisory['security_sla'],
                'state':
                advisory['state'],
                'status_time':
                advisory['status_time'],
                'synopsis':
                advisory['synopsis'],
                'update_date':
                advisory['update_date'],
            })[0]
            container_adv = False

            for associated_build in self.get_associated_builds(advisory['id']):
                # Even if a node has two labels in the database, Neo4j returns the node
                # only with the specific label you asked for. Hence we check for labels
                # ContainerKojiBuild and KojiBuild separately for the same node.
                build = ContainerKojiBuild.nodes.get_or_none(
                    id_=associated_build['id_'])
                if not build:
                    build = KojiBuild.nodes.get_or_none(
                        id_=associated_build['id_'])

                if build and not container_adv:
                    if build.__label__ == 'ContainerKojiBuild':
                        adv.add_label(ContainerAdvisory.__label__)
                        container_adv = True

                # If this is set, that means it was once part of the advisory but not anymore.
                # This relationship needs to be deleted if it exists.
                if associated_build['removed_index_id']:
                    if build:
                        adv.attached_builds.disconnect(build)
                else:
                    # Query Teiid and create the entry only if the build is not present in Neo4j
                    if not build:
                        attached_build = self.get_koji_build(
                            associated_build['id_'])
                        if attached_build:
                            if self.is_container_build(attached_build):
                                build = ContainerKojiBuild.get_or_create(
                                    {'id_': associated_build['id_']})[0]
                            else:
                                build = KojiBuild.get_or_create(
                                    {'id_': associated_build['id_']})[0]

                    # This will happen only if we do not find the build we are looking for in Teiid
                    # which shouldn't usually happen under normal conditions
                    if not build:
                        log.warn(
                            'The Koji build with ID {} was not found in Teiid!'
                            .format(associated_build['id_']))
                        continue

                    if adv.__label__ != ContainerAdvisory.__label__ \
                            and build.__label__ == ContainerKojiBuild.__label__:
                        adv.add_label(ContainerAdvisory.__label__)

                    attached_rel = adv.attached_builds.relationship(build)
                    time_attached = associated_build['time_attached']
                    if attached_rel:
                        if attached_rel.time_attached != time_attached:
                            adv.attached_builds.replace(
                                build, {'time_attached': time_attached})
                    else:
                        adv.attached_builds.connect(
                            build, {'time_attached': time_attached})

            assigned_to = User.get_or_create(
                {'username': advisory['assigned_to'].split('@')[0]})[0]
            adv.conditional_connect(adv.assigned_to, assigned_to)
            reporter = User.get_or_create(
                {'username': advisory['reporter'].split('@')[0]})[0]
            adv.conditional_connect(adv.reporter, reporter)

            for attached_bug in self.get_attached_bugs(advisory['id']):
                bug = BugzillaBug.get_or_create(attached_bug)[0]
                adv.attached_bugs.connect(bug)