Exemple #1
0
    def get_identifier_by_source_id(self, model_object, source_id):
        """Returns the ori identifier based on the specified source identifier.

        The ori identifier on a `Hot` node is queried by looking for the source
        identifier on `Cold` nodes. Should return exactly one int or a
        QueryResultError exception."""
        fmt = AQuoteFormatter()

        label = self.serializer.label(model_object)

        params = {
            'n1_labels': u':'.join([self.HOT, cypher_escape(label)]),
            'n2_labels': u':'.join([self.COLD, cypher_escape(label)]),
        }
        params.update(self.default_params)

        clauses = [
            u'MATCH (n2 :«n2_labels» {«had_primary_source»: $had_primary_source})<--(n1 :«n1_labels»)',
            u'RETURN n1.«ori_identifier» AS ori_identifier',
        ]

        result = self.query(fmt.format(u'\n'.join(clauses), **params),
                            had_primary_source=source_id)

        if not result:
            raise MissingProperty('Does not exist')

        if len(result) > 1:
            raise QueryResultError(
                'The number of results is greater than one!')

        return result[0]['ori_identifier']
Exemple #2
0
    def to_cypher_item(self):
        # First we create nodes and edges using MERGE statements
        for triple in self.triples:
            source_id, property_id, target_id = triple
            cypher = (
                u"""MERGE (source:Entity {{name: "{source_id}"}}) WITH source """
                """MERGE (target:Entity {{name: "{target_id}"}}) WITH source, target """
                """CREATE (source)-[property:{property_id}]->(target) """
                """SET property.name = "{property_id}";""").format(
                    source_id=source_id, property_id=property_id, target_id=target_id) 
            self.cypher_statements.append(cypher)

        non_relational_properties = {
            property_name: value for property_name, value in
            self.non_relational_properties.iteritems()}
        delisted_dictionary = delist_dictionary(non_relational_properties)
        for non_relational_property, property_value in delisted_dictionary.iteritems():
            cypher = (
                u"""MERGE (source:Entity {{name: "{source_id}"}}) """
                """SET source.{non_relational_property} = "{property_value}"; """).format(
                    source_id=self.wikidata_id,
                    non_relational_property=non_relational_property,
                    property_value=py2neo.cypher_escape(property_value).replace(u'\\', u'\\\\').replace('"', '\\"'))
            self.cypher_statements.append(cypher)
        cypher = (
            u"""MERGE (source:Entity {{name: "{source_id}"}}) """
            """SET source.english_label = "{property_value}"; """).format(
                source_id=self.wikidata_id,
                property_value=py2neo.cypher_escape(self.english_label).replace(u'\\', u'\\\\').replace('"', '\\"'))
        self.cypher_statements.append(cypher)
Exemple #3
0
    def copy_relations(self):
        """Copies the relations from Cold->Cold nodes to Hot->Hot nodes.

        All relations between these nodes that do not already exist are copied.
        Only direct relations between `Cold` nodes are matched.
        """
        fmt = AQuoteFormatter()

        params = {
            'labels': self.COLD,
            'n1_labels': self.HOT,
            'n2_labels': self.COLD,
            'n3_labels': self.HOT,
        }
        params.update(self.default_params)

        clauses = [
            u'MATCH (n1 :«n1_labels»)-[:«was_derived_from»]->(n2 :«n2_labels»)-[r]->(:«labels»)<-[:«was_derived_from»]-(n3 :«n3_labels»)',
            u'WHERE NOT (n1)--(n3)',
            u'RETURN id(n1) AS id1, id(n2) as id2, id(n3) AS id3, type(r) AS rel, id(startNode(r)) AS start',
        ]

        for result in self.query(fmt.format(u'\n'.join(clauses), **params)):
            clauses = [
                u'MATCH (n1), (n3)', u'WHERE id(n1) = $id1',
                u'AND id(n3) = $id3', u'MERGE (n1)-[:«rel»]->(n3)'
            ]

            self.query(fmt.format(u'\n'.join(clauses),
                                  rel=cypher_escape(result['rel']),
                                  **params),
                       id1=result['id1'],
                       id3=result['id3'])
Exemple #4
0
def escape_fields(payload, raw_values=False):
    result = []
    for field, value in payload.items():
        if not raw_values:
            value = py2neo.cypher_repr(value)
        result.append((py2neo.cypher_escape(field), value))
    return result
Exemple #5
0
    def attach(self, this_object, that_object, rel_type):
        """Attaches this_object to that_object model.

        The query will match the `Cold` node based on the source_id of the
        models. If available it will set `r1_props` on the relation between the
        nodes.
        """
        from .model import Model, Relationship

        fmt = AQuoteFormatter()

        r1_props = dict()
        if isinstance(that_object, Relationship):
            r1_props = that_object.rel
            that_object = that_object.model

        if isinstance(r1_props, Model):
            r1_props = r1_props.serializer.deflate(props=True, rels=True)

        this_label = self.serializer.label(this_object)
        that_label = self.serializer.label(that_object)

        params = {
            'n2_labels': u':'.join([self.COLD,
                                    cypher_escape(this_label)]),
            'n3_labels': u':'.join([self.COLD,
                                    cypher_escape(that_label)]),
            'r1_labels': cypher_escape(rel_type),
        }
        params.update(self.default_params)

        clauses = [
            u'MATCH (n2 :«n2_labels» {«had_primary_source»: $had_primary_source1})',
            u'MATCH (n3 :«n3_labels» {«had_primary_source»: $had_primary_source2})',
            u'MERGE (n2)-[r1 :«r1_labels»]->(n3)',
            u'SET r1 = $r1_props',
        ]

        self.query(fmt.format(u'\n'.join(clauses), **params),
                   had_primary_source1=this_object.had_primary_source,
                   had_primary_source2=that_object.had_primary_source,
                   r1_props=r1_props)
    def get_identifier(self, model_object, **kwargs):
        """Returns the ori identifier based on the specified keyword-argument.

        The ori identifier on a `Hot` node is queried by looking for the source
        identifier on `Cold` nodes. Should return exactly one int or a
        QueryResultError exception."""
        if len(kwargs) != 1:
            raise TypeError('connect takes exactly 1 keyword-argument')

        filter_key, filter_value = kwargs.items()[0]

        label = self.serializer.label(model_object)
        definition = model_object.definition(filter_key)

        params = {
            'labels': cypher_escape(label),
            'filter_key': cypher_escape(definition.absolute_uri())
        }
        params.update(self.default_params)

        clauses = [
            u'MATCH (n :«labels» {«filter_key»: $filter_value})',
            u'RETURN n.«ori_identifier» AS ori_identifier',
        ]

        result = self.query(
            fmt.format(u'\n'.join(clauses), **params),
            filter_value=filter_value
        )

        if not result:
            raise MissingProperty(
                'Does not exist: %s with %s=%s' % (model_object.verbose_name(),
                                                   filter_key, filter_value)
            )

        if len(result) > 1:
            raise QueryResultError('The number of results is greater than one!')

        return result[0]['ori_identifier']
    def attach(self, this_object, that_object, rel_type):
        """Attaches this_object to that_object model.

        The query will match the `Cold` node based on the source_id of the
        models. If available it will set `r1_props` on the relation between the
        nodes.
        """
        from .model import Model, Relationship

        r1_props = dict()
        if isinstance(that_object, Relationship):
            r1_props = that_object.rel
            that_object = that_object.model

        if isinstance(r1_props, Model):
            r1_props = r1_props.serializer.deflate(props=True, rels=True)

        this_label = self.serializer.label(this_object)
        that_label = self.serializer.label(that_object)

        params = {
            'n2_labels': cypher_escape(this_label),
            'n3_labels': cypher_escape(that_label),
            'r1_labels': cypher_escape(rel_type),
        }
        params.update(self.default_params)

        clauses = [
            u'MATCH (n2 :«n2_labels» {«ori_identifier»: $ori_identifier1})',
            u'MATCH (n3 :«n3_labels» {«ori_identifier»: $ori_identifier2})',
            u'MERGE (n2)-[r1 :«r1_labels»]->(n3)',
            u'SET r1 = $r1_props',
        ]

        self.query(
            fmt.format(u'\n'.join(clauses), **params),
            ori_identifier1=this_object.ori_identifier,
            ori_identifier2=that_object.ori_identifier,
            r1_props=r1_props
        )
Exemple #8
0
    def get_shortest_path_relationship(self, label_a, node_id_a, label_b,
                                       node_id_b):
        """Returns the first shortest path between two nodes identified by label and node_id 

        If there are any results (there could be many shortest paths), it returns the first one (as per spec).
        The path is returned as a list, from the start node, to the end node including the relationships in between.

        Keyword arguments:
        label_a -- The label for node A (e.g. 'Address')
        node_id_a -- The node_id for entity A (e.g. 5124536)
        label_b -- The label for node B (e.g. 'Entity')
        node_id_b -- The node_id for entity B (e.g. 5124537)
        """

        errors = [
            '{label} label not in the graph!'.format(label=l)
            for l in [label_a, label_b] if not l in self.graph.node_labels
        ]
        if errors:
            raise ValueError("\n".join(errors))

        query = """MATCH (a:{label_a}{{node_id: \"{node_id_a}\"}}),
            (b:{label_b}{{node_id: \"{node_id_b}\"}}),
            p = shortestPath((a)-[*]-(b)) RETURN p""".format(
            label_a=cypher_escape(label_a),
            label_b=cypher_escape(label_b),
            node_id_a=node_id_a,
            node_id_b=node_id_b)
        result = self.graph.evaluate(query)
        return {
            'path': [write_relationship(i) for i in result.relationships()],
            'path_abbrev': ["{i}".format(i=i) for i in result.relationships()],
            'nodes': [{
                'id': i.__name__,
                'node': i
            } for i in result.nodes()]
        } if result else {}
Exemple #9
0
 def to_cypher_property(self):
     cypher = (
         u"""MERGE (property:Property {{name: "{property_id}"}}); """)
     non_relational_properties = {
         property_name: value for property_name, value in
         self.non_relational_properties.iteritems()}
     delisted_dictionary = delist_dictionary(non_relational_properties)
     for non_relational_property, property_value in delisted_dictionary.iteritems():
         cypher = (
             u"""MERGE (source_property:Property """
             """{{name: "{source_property_id}"}}) """
             """SET source_property.{non_relational_property} = """
             """ "{non_relational_property_value}"; """).format(
                 source_property_id=self.wikidata_id,
                 non_relational_property=non_relational_property,
                 non_relational_property_value=py2neo.cypher_escape(
                     property_value).replace('\\', '\\\\').replace(u'"', u'\\"'))
         self.cypher_statements.append(cypher)
    def _create_blank_node(self, model_object):
        if not model_object.values.get('ori_identifier'):
            model_object.generate_ori_identifier()

        labels = self.serializer.label(model_object)
        props = self.serializer.deflate(model_object, props=True, rels=False)

        params = {
            'labels': cypher_escape(labels),
        }
        params.update(self.default_params)

        clauses = [
            u'CREATE (n :«labels»)',
            u'SET n += $props',
            u'RETURN n',
        ]

        cursor = self.session.run(
            fmt.format(u'\n'.join(clauses), **params),
            props=props,
        )
        cursor.summary()
    def _merge(self, model_object):
        labels = self.serializer.label(model_object)
        props = self.serializer.deflate(model_object, props=True, rels=False)

        # todo this quickfix needs to be refactored
        del props[Uri(Prov, 'hadPrimarySource')]

        params = {
            'labels': cypher_escape(labels),
        }
        params.update(self.default_params)

        clauses = [
            u'MERGE (n :«labels» {«ori_identifier»: $ori_identifier})',
            u'SET n += $props',
            u'SET(',  # Only add had_primary_source to array if doesn't exist
            u'  CASE WHEN NOT $had_primary_source IN n.«had_primary_source» THEN n END',
            u').«had_primary_source» = n.«had_primary_source» + [$had_primary_source]',
            u'WITH n',
            u'OPTIONAL MATCH (n)-->(m)',  # Remove all directly related blank nodes
            u'WHERE NOT EXISTS(m.«had_primary_source»)',
            u'DETACH DELETE m',
            u'WITH n',
            u'OPTIONAL MATCH (n)-[r]->()',  # Remove all outgoing relationships
            u'DELETE r',
            u'WITH n',
            u'RETURN n',
        ]

        cursor = self.session.run(
            fmt.format(u'\n'.join(clauses), **params),
            had_primary_source=model_object.had_primary_source,
            ori_identifier=model_object.ori_identifier,
            props=props,
        )
        cursor.summary()
Exemple #12
0
class Neo4jDatabase(object):
    """Database implementation for Neo4j graph database.

    Provides methods for model operations to process ETL data for new and
    existing nodes. When the class is initialized, it reuses the driver if it
    has been used before.
    """
    _driver = None

    HOT = 'Hot'
    COLD = 'Cold'
    ARCHIVE = 'Archive'

    default_params = {
        'was_revision_of': cypher_escape(Uri(Prov, 'wasRevisionOf')),
        'was_derived_from': cypher_escape(Uri(Prov, 'wasDerivedFrom')),
        'had_primary_source': cypher_escape(Uri(Prov, 'hadPrimarySource')),
        'provided_by': cypher_escape(Uri(Pav, 'providedBy')),
        'ori_identifier': cypher_escape(Uri(Mapping, 'ori/identifier')),
    }

    def __init__(self, serializer):
        self.serializer = serializer

        if not self._driver:
            # Set driver on the class so all instances use the same driver
            type(self)._driver = GraphDatabase.driver(NEO4J_URL,
                                                      auth=(
                                                          NEO4J_USER,
                                                          NEO4J_PASSWORD,
                                                      ))

        self.session = self._driver.session()
        self.tx = None

    def query(self, query, **params):
        """Executes a query and returns the result"""
        cursor = self.session.run(query, **params)
        result = cursor.data()
        return result

    def transaction_query(self, query, **params):
        """Adds a query to be executed as a transaction. All queries called with
        this method will be in the same transaction until `transaction_commit`
        is called.
        """
        if not self.tx:
            self.tx = self.session.begin_transaction()

        self.tx.run(query, **params)

    def transaction_commit(self):
        """Commits all queries that are added by `transaction_query`."""
        if self.tx:
            result = self.tx.commit()
            self.tx = None  # Make sure the tx is reset
            return result

    def create_constraints(self):
        """Creates constraints on identifiers in Neo4j"""
        self.session.run('CREATE CONSTRAINT ON (x:Hot)'
                         'ASSERT x.`{}` IS UNIQUE'.format(
                             Uri(Mapping, 'ori/identifier')))

        self.session.run('CREATE CONSTRAINT ON (x:Live)'
                         'ASSERT x.`{}` IS UNIQUE'.format(
                             Uri(Mapping, 'ori/sourceLocator')))

    def get_identifier_by_source_id(self, model_object, source_id):
        """Returns the ori identifier based on the specified source identifier.

        The ori identifier on a `Hot` node is queried by looking for the source
        identifier on `Cold` nodes. Should return exactly one int or a
        QueryResultError exception."""
        fmt = AQuoteFormatter()

        label = self.serializer.label(model_object)

        params = {
            'n1_labels': u':'.join([self.HOT, cypher_escape(label)]),
            'n2_labels': u':'.join([self.COLD, cypher_escape(label)]),
        }
        params.update(self.default_params)

        clauses = [
            u'MATCH (n2 :«n2_labels» {«had_primary_source»: $had_primary_source})<--(n1 :«n1_labels»)',
            u'RETURN n1.«ori_identifier» AS ori_identifier',
        ]

        result = self.query(fmt.format(u'\n'.join(clauses), **params),
                            had_primary_source=source_id)

        if not result:
            raise MissingProperty('Does not exist')

        if len(result) > 1:
            raise QueryResultError(
                'The number of results is greater than one!')

        return result[0]['ori_identifier']

    def replace(self, model_object):
        """Replaces or creates nodes based on the model object.

        Existing nodes are replaced by the deflated model object and new ones
        are created when they do not exist. Three queries are run sequentially
        until one of them yields a result.

        The first will add a new version if an older version exists on a node,
        the second will add a new version when no older version exists, the
        third will create new nodes if the nodes do not yet exist. If the third
        query fails, an QueryResultError is raised.

        The first and second query will match the `Cold` node based on the
        source_id.
        """
        fmt = AQuoteFormatter()

        label = self.serializer.label(model_object)
        n2_props = self.serializer.deflate(model_object,
                                           props=True,
                                           rels=False)

        params = {
            'n1_labels':
            u':'.join([self.HOT, cypher_escape(label)]),
            'n2_labels':
            u':'.join([self.COLD, cypher_escape(label)]),
            'n3_labels':
            self.ARCHIVE,
            'n4_labels':
            self.ARCHIVE,
            'n5_labels':
            u':'.join(
                [self.ARCHIVE,
                 cypher_escape(Uri(Prov, 'SoftwareAgent'))]),
        }
        params.update(self.default_params)

        if hasattr(model_object, '_source'):
            # Keep it readable
            # Expand labels
            # Same name variables
            # Escaping some variables
            # Parameters

            # Add a new version if an older version already exists
            clauses = [
                u'MATCH (n1 :«n1_labels»)--(n2 :«n2_labels» {«had_primary_source»: $had_primary_source})-[r2 :«was_revision_of»]-(n3 :«n3_labels»)',
                u'MERGE (n2)-[:«was_revision_of»]->(n4 :«n4_labels»)-[:«was_revision_of»]->(n3)',
                u'MERGE (n2)-[:«provided_by»]->(n5 :«n5_labels» {name: $name})',
                u'SET n4 = n2',
                u'SET n2 = $n2_props',
                u'DELETE r2',
            ]

            cursor = self.session.run(
                fmt.format(u'\n'.join(clauses), **params),
                n2_props=n2_props,
                had_primary_source=model_object.had_primary_source,
                name=model_object._source,
            )
            summary = cursor.summary()
            if summary.counters.relationships_deleted > 0:
                return

            # Add a new version if no older version exists
            clauses = [
                u'MATCH (n1 :«n1_labels»)--(n2 :«n2_labels» {«had_primary_source»: $had_primary_source})',
                u'MERGE (n2)-[:«was_revision_of»]->(n4 :«n4_labels»)',
                u'MERGE (n2)-[:«provided_by»]->(n5 :«n5_labels» {name: $name})',
                u'SET n4 = n2',
                u'SET n2 = $n2_props',
            ]

            cursor = self.session.run(
                fmt.format(u'\n'.join(clauses), **params),
                n2_props=n2_props,
                had_primary_source=model_object.had_primary_source,
                name=model_object._source,
            )
            summary = cursor.summary()
            if summary.counters.nodes_created > 0:
                return

        clauses = [
            u'MATCH (n1 :«n1_labels» {«had_primary_source»: $had_primary_source})',
            u'RETURN n1',
        ]

        cursor = self.session.run(
            fmt.format(u'\n'.join(clauses), **params),
            had_primary_source=model_object.had_primary_source)

        n1_props = copy(n2_props)
        if len(cursor.data()) == 0:
            # n1_props = n2_props + ori_identifier
            n1_props[str(Uri(Mapping, 'ori/identifier'))] = \
                model_object.generate_ori_identifier()

        # Create a new entity when no matching node seems to exist
        clauses = [
            u'MERGE (n1 :«n1_labels» {«had_primary_source»: $had_primary_source})-[:«was_derived_from»]->(n2 :«n2_labels»)',
        ]
        bound_params = {}

        if hasattr(model_object, '_source'):
            clauses.extend([
                u'MERGE (n5 :«n5_labels» {name: $name})',
                u'MERGE (n2)-[:«provided_by»]->(n5)',
            ])
            bound_params['name'] = model_object._source
        clauses.extend([
            u'SET n1 = $n1_props',
            u'SET n2 = $n2_props',
            u'RETURN n1.«ori_identifier» AS ori_identifier',
        ])

        cursor = self.session.run(
            fmt.format(u'\n'.join(clauses), **params),
            n1_props=n1_props,
            n2_props=n2_props,
            had_primary_source=model_object.had_primary_source,
            **bound_params)
        result = cursor.data()
        if len(result) > 0:
            model_object.ori_identifier = result[0]['ori_identifier']
            return

        raise QueryEmptyResult('No ori_identifier was returned')

    def attach(self, this_object, that_object, rel_type):
        """Attaches this_object to that_object model.

        The query will match the `Cold` node based on the source_id of the
        models. If available it will set `r1_props` on the relation between the
        nodes.
        """
        from .model import Model, Relationship

        fmt = AQuoteFormatter()

        r1_props = dict()
        if isinstance(that_object, Relationship):
            r1_props = that_object.rel
            that_object = that_object.model

        if isinstance(r1_props, Model):
            r1_props = r1_props.serializer.deflate(props=True, rels=True)

        this_label = self.serializer.label(this_object)
        that_label = self.serializer.label(that_object)

        params = {
            'n2_labels': u':'.join([self.COLD,
                                    cypher_escape(this_label)]),
            'n3_labels': u':'.join([self.COLD,
                                    cypher_escape(that_label)]),
            'r1_labels': cypher_escape(rel_type),
        }
        params.update(self.default_params)

        clauses = [
            u'MATCH (n2 :«n2_labels» {«had_primary_source»: $had_primary_source1})',
            u'MATCH (n3 :«n3_labels» {«had_primary_source»: $had_primary_source2})',
            u'MERGE (n2)-[r1 :«r1_labels»]->(n3)',
            u'SET r1 = $r1_props',
        ]

        self.query(fmt.format(u'\n'.join(clauses), **params),
                   had_primary_source1=this_object.had_primary_source,
                   had_primary_source2=that_object.had_primary_source,
                   r1_props=r1_props)

    def copy_relations(self):
        """Copies the relations from Cold->Cold nodes to Hot->Hot nodes.

        All relations between these nodes that do not already exist are copied.
        Only direct relations between `Cold` nodes are matched.
        """
        fmt = AQuoteFormatter()

        params = {
            'labels': self.COLD,
            'n1_labels': self.HOT,
            'n2_labels': self.COLD,
            'n3_labels': self.HOT,
        }
        params.update(self.default_params)

        clauses = [
            u'MATCH (n1 :«n1_labels»)-[:«was_derived_from»]->(n2 :«n2_labels»)-[r]->(:«labels»)<-[:«was_derived_from»]-(n3 :«n3_labels»)',
            u'WHERE NOT (n1)--(n3)',
            u'RETURN id(n1) AS id1, id(n2) as id2, id(n3) AS id3, type(r) AS rel, id(startNode(r)) AS start',
        ]

        for result in self.query(fmt.format(u'\n'.join(clauses), **params)):
            clauses = [
                u'MATCH (n1), (n3)', u'WHERE id(n1) = $id1',
                u'AND id(n3) = $id3', u'MERGE (n1)-[:«rel»]->(n3)'
            ]

            self.query(fmt.format(u'\n'.join(clauses),
                                  rel=cypher_escape(result['rel']),
                                  **params),
                       id1=result['id1'],
                       id3=result['id3'])
Exemple #13
0
    def replace(self, model_object):
        """Replaces or creates nodes based on the model object.

        Existing nodes are replaced by the deflated model object and new ones
        are created when they do not exist. Three queries are run sequentially
        until one of them yields a result.

        The first will add a new version if an older version exists on a node,
        the second will add a new version when no older version exists, the
        third will create new nodes if the nodes do not yet exist. If the third
        query fails, an QueryResultError is raised.

        The first and second query will match the `Cold` node based on the
        source_id.
        """
        fmt = AQuoteFormatter()

        label = self.serializer.label(model_object)
        n2_props = self.serializer.deflate(model_object,
                                           props=True,
                                           rels=False)

        params = {
            'n1_labels':
            u':'.join([self.HOT, cypher_escape(label)]),
            'n2_labels':
            u':'.join([self.COLD, cypher_escape(label)]),
            'n3_labels':
            self.ARCHIVE,
            'n4_labels':
            self.ARCHIVE,
            'n5_labels':
            u':'.join(
                [self.ARCHIVE,
                 cypher_escape(Uri(Prov, 'SoftwareAgent'))]),
        }
        params.update(self.default_params)

        if hasattr(model_object, '_source'):
            # Keep it readable
            # Expand labels
            # Same name variables
            # Escaping some variables
            # Parameters

            # Add a new version if an older version already exists
            clauses = [
                u'MATCH (n1 :«n1_labels»)--(n2 :«n2_labels» {«had_primary_source»: $had_primary_source})-[r2 :«was_revision_of»]-(n3 :«n3_labels»)',
                u'MERGE (n2)-[:«was_revision_of»]->(n4 :«n4_labels»)-[:«was_revision_of»]->(n3)',
                u'MERGE (n2)-[:«provided_by»]->(n5 :«n5_labels» {name: $name})',
                u'SET n4 = n2',
                u'SET n2 = $n2_props',
                u'DELETE r2',
            ]

            cursor = self.session.run(
                fmt.format(u'\n'.join(clauses), **params),
                n2_props=n2_props,
                had_primary_source=model_object.had_primary_source,
                name=model_object._source,
            )
            summary = cursor.summary()
            if summary.counters.relationships_deleted > 0:
                return

            # Add a new version if no older version exists
            clauses = [
                u'MATCH (n1 :«n1_labels»)--(n2 :«n2_labels» {«had_primary_source»: $had_primary_source})',
                u'MERGE (n2)-[:«was_revision_of»]->(n4 :«n4_labels»)',
                u'MERGE (n2)-[:«provided_by»]->(n5 :«n5_labels» {name: $name})',
                u'SET n4 = n2',
                u'SET n2 = $n2_props',
            ]

            cursor = self.session.run(
                fmt.format(u'\n'.join(clauses), **params),
                n2_props=n2_props,
                had_primary_source=model_object.had_primary_source,
                name=model_object._source,
            )
            summary = cursor.summary()
            if summary.counters.nodes_created > 0:
                return

        clauses = [
            u'MATCH (n1 :«n1_labels» {«had_primary_source»: $had_primary_source})',
            u'RETURN n1',
        ]

        cursor = self.session.run(
            fmt.format(u'\n'.join(clauses), **params),
            had_primary_source=model_object.had_primary_source)

        n1_props = copy(n2_props)
        if len(cursor.data()) == 0:
            # n1_props = n2_props + ori_identifier
            n1_props[str(Uri(Mapping, 'ori/identifier'))] = \
                model_object.generate_ori_identifier()

        # Create a new entity when no matching node seems to exist
        clauses = [
            u'MERGE (n1 :«n1_labels» {«had_primary_source»: $had_primary_source})-[:«was_derived_from»]->(n2 :«n2_labels»)',
        ]
        bound_params = {}

        if hasattr(model_object, '_source'):
            clauses.extend([
                u'MERGE (n5 :«n5_labels» {name: $name})',
                u'MERGE (n2)-[:«provided_by»]->(n5)',
            ])
            bound_params['name'] = model_object._source
        clauses.extend([
            u'SET n1 = $n1_props',
            u'SET n2 = $n2_props',
            u'RETURN n1.«ori_identifier» AS ori_identifier',
        ])

        cursor = self.session.run(
            fmt.format(u'\n'.join(clauses), **params),
            n1_props=n1_props,
            n2_props=n2_props,
            had_primary_source=model_object.had_primary_source,
            **bound_params)
        result = cursor.data()
        if len(result) > 0:
            model_object.ori_identifier = result[0]['ori_identifier']
            return

        raise QueryEmptyResult('No ori_identifier was returned')
    def replace(self, model_object):
        """Replaces or creates nodes based on the model object.

        Existing nodes are replaced by the deflated model object and new ones
        are created when they do not exist. Three queries are run sequentially
        until one of them yields a result.

        The first will add a new version if an older version exists on a node,
        the second will add a new version when no older version exists, the
        third will create new nodes if the nodes do not yet exist. If the third
        query fails, an QueryResultError is raised.

        The first and second query will match the `Cold` node based on the
        source_id.
        """
        labels = self.serializer.label(model_object)

        params = {
            'labels': cypher_escape(labels),
            'had_primary_source': cypher_escape(Uri(Prov, 'hadPrimarySource')),
        }
        params.update(self.default_params)

        if not model_object.values.get('had_primary_source'):

            from ocd_backend.models.model import Individual
            if isinstance(model_object, Individual):
                clauses = [
                    u'MATCH (n :«labels»)',
                    u'RETURN n.«ori_identifier» AS ori_identifier',
                ]

                cursor = self.session.run(
                    fmt.format(u'\n'.join(clauses), **params),
                )
                result = cursor.data()

                if len(result) > 1:
                    raise QueryResultError('The number of results is greater than one!')

                elif len(result) < 1:
                    model_object.generate_ori_identifier()
                    props = self.serializer.deflate(model_object, props=True, rels=False)

                    clauses = [
                        u'MERGE (n :«labels»)',
                        u'SET n += $props',
                        u'RETURN n.«ori_identifier» AS ori_identifier',
                    ]

                    cursor = self.session.run(
                        fmt.format(u'\n'.join(clauses), **params),
                        props=props,
                    )
                    cursor.summary()

                else:
                    try:
                        model_object.ori_identifier = result[0]['ori_identifier']
                    except Exception:
                        raise QueryResultError('No ori_identifier was returned')
            else:
                self._create_blank_node(model_object)
        else:
            # if ori_identifier is already known use that to identify instead
            if model_object.values.get('ori_identifier'):
                self._merge(model_object)
            else:
                clauses = [
                    u'MATCH (n :«labels»)',
                    u'WHERE $had_primary_source IN n.«had_primary_source»',
                    u'RETURN n.«ori_identifier» AS ori_identifier',
                ]

                cursor = self.session.run(
                    fmt.format(u'\n'.join(clauses), **params),
                    had_primary_source=model_object.had_primary_source,
                )
                result = cursor.data()

                if len(result) > 1:
                    # Todo don't fail yet until unique constraints are solved
                    # raise QueryResultError('The number of results is greater than one!')
                    pass

                try:
                    ori_identifier = result[0]['ori_identifier']
                except Exception:
                    ori_identifier = None

                if ori_identifier:
                    model_object.ori_identifier = ori_identifier
                    self._merge(model_object)
                else:
                    # if ori_identifier do merge otherwise create
                    self._create_node(model_object)
Exemple #15
0
def escape(identifier):
    return py2neo.cypher_escape(identifier)
Exemple #16
0
 def raw_and_cypher(raw):
     if raw is not None:
         return raw, py2neo.cypher_escape(raw)
     else:
         return None, None