Exemple #1
0
    def get_graph(self, request, **kwargs):
        self.method_check(request, allowed=['get'])
        self.throttle_check(request)

        depth = int(request.GET['depth']) if 'depth' in request.GET.keys() else 1
        aggregation_threshold = 10

        ###
        # First we retrieve every leaf in the graph
        query = """
            START root=node({root})
            MATCH p = (root)-[*1..{depth}]-(leaf)<-[:`<<INSTANCE>>`]-(type)
            WHERE HAS(leaf.name)
            AND type.app_label = '{app_label}'
            AND length(filter(r in relationships(p) : type(r) = "<<INSTANCE>>")) = 1
            RETURN leaf, ID(leaf) as id_leaf, type
        """.format(root=kwargs['pk'], depth=depth, app_label=get_model_topic(self.get_model()))
        rows = connection.cypher(query).to_dicts()

        leafs = {}

        # We need to retrieve the root in another request
        # TODO : enhance that
        query = """
            START root=node({root})
            MATCH (root)<-[:`<<INSTANCE>>`]-(type)
            RETURN root as leaf, ID(root) as id_leaf, type
        """.format(root=kwargs['pk'])
        for row in connection.cypher(query).to_dicts():
            rows.append(row)

        for row in rows:
            row['leaf']['data']['_id'] = row['id_leaf']
            row['leaf']['data']['_type'] = row['type']['data']['model_name']
            leafs[row['id_leaf']] = row['leaf']['data']
        #
        ###

        ###
        # Then we retrieve all edges
        query = """
            START A=node({leafs})
            MATCH (A)-[rel]->(B)
            WHERE type(rel) <> "<<INSTANCE>>"
            RETURN ID(A) as head, type(rel) as relation, id(B) as tail
        """.format(leafs=','.join([str(id) for id in leafs.keys()]))
        rows = connection.cypher(query).to_dicts()

        edges = []
        for row in rows:
            try:
                if (leafs[row['head']] and leafs[row['tail']]):
                    edges.append([row['head'], row['relation'], row['tail']])
            except KeyError:
                pass
        #
        ###

        self.log_throttled_access(request)
        return self.create_response(request, {'leafs': leafs, 'edges' : edges})
Exemple #2
0
 def query(self, root="*"):
     # Replace the query's tags
     # by there choosen value
     query = self.query_str.format(
         root=root,
         match=self.match.format(select=self.select, model=self.model),
         select=self.select,
     )
     # Execute the query and returnt the result as a dictionnary
     return self.transform(connection.cypher(query).to_dicts())
Exemple #3
0
    def summary_mine(self, bundle):
        request = bundle.request
        self.method_check(request, allowed=['get'])
        self.throttle_check(request)
        if not request.user.id:
            raise UnauthorizedError('This method require authentication')

        query = """
            START root=node(*)
            MATCH (type)-[`<<INSTANCE>>`]->(root)
            WHERE HAS(root.name)
            AND HAS(root._author)
            AND HAS(type.model_name)
            AND %s IN root._author
            RETURN DISTINCT ID(root) as id, root.name as name, type.name as model
        """ % int(request.user.id)

        matches      = connection.cypher(query).to_dicts()
        count        = len(matches)
        limit        = int(request.GET.get('limit', 20))
        paginator    = Paginator(matches, limit)

        try:
            p     = int(request.GET.get('page', 1))
            page  = paginator.page(p)
        except InvalidPage:
            raise Http404("Sorry, no results on that page.")

        objects = []
        for result in page.object_list:
            label = result.get("name", None)
            objects.append({
                'label': label,
                'subject': {
                    "name": result.get("id", None),
                    "label": label
                },
                'predicate': {
                    "label": "is instance of",
                    "name": "<<INSTANCE>>"
                },
                'object': result.get("model", None)
            })

        object_list = {
            'objects': objects,
            'meta': {
                'page': p,
                'limit': limit,
                'total_count': count
            }
        }

        return object_list
Exemple #4
0
    def rdf_search_query(self, subject, predicate, obj):
        identifier = obj["id"] if "id" in obj else obj
        # retrieve all models in current topic
        all_models = dict(
            (model.__name__, model) for model in self.get_models())
        # If the received identifier describe a literal value
        if self.is_registered_literal(predicate["name"]):
            # Get the field name into the database
            field_name = predicate["name"]
            # Build the request
            query = """
                START root=node(*)
                MATCH (root)<-[:`<<INSTANCE>>`]-(type)
                WHERE HAS(root.name)
                AND HAS(root.{field})
                AND root.{field} = {value}
                AND type.model_name = {model}
                AND type.app_label = '{app}'
                RETURN DISTINCT ID(root) as id, root.name as name, type.model_name as model
            """.format(field=field_name,
                       value=identifier,
                       model=subject["name"],
                       app=self.app_label())

        # If the received identifier describe a literal value
        elif self.is_registered_relationship(predicate["name"]):
            fields = utils.iterate_model_fields(
                all_models[predicate["subject"]])
            # Get the field name into the database
            relationships = [
                field for field in fields if field["name"] == predicate["name"]
            ]
            # We didn't find the predicate
            if not len(relationships):
                return {'errors': 'Unkown predicate type'}
            relationship = relationships[0]["rel_type"]
            # Query to get every result
            query = u"""
                START st=node({id})
                MATCH (st){is_out}-[:`{relationship}`]-{is_in}(root)<-[:`<<INSTANCE>>`]-(type)
                WHERE HAS(root.name)
                AND HAS(st.name)
                AND type.app_label = '{app}'
                RETURN DISTINCT ID(root) as id, root.name as name, type.model_name as model
            """.format(
                relationship=relationship,
                id=identifier,
                app=self.app_label(),
                is_out='<' if relationships[0]['direction'] == 'out' else '',
                is_in='>' if relationships[0]['direction'] == 'in' else '')
        else:
            return {'errors': 'Unkown predicate type: %s' % predicate["name"]}
        return connection.cypher(query).to_dicts()
Exemple #5
0
 def search(self, query):
     match = str(query).lower()
     match = re.sub("\"|'|`|;|:|{|}|\|(|\|)|\|", '', match).strip()
     # Query to get every result
     query = """
         START root=node(*)
         MATCH (root)<-[r:`<<INSTANCE>>`]-(type)
         WHERE HAS(root.name)
         AND LOWER(root.name) =~ '.*(%s).*'
         RETURN ID(root) as id, root.name as name, type.name as model
     """ % match
     return connection.cypher(query).to_dicts()
Exemple #6
0
 def rdf_search(self, subject, predicate, obj):
     # Query to get every result
     query = """
         START st=node(*)
         MATCH (st)<-[:`%s`]-(root)<-[:`<<INSTANCE>>`]-(type)
         WHERE HAS(root.name)
         AND HAS(st.name)
         AND type.name = "%s"
         AND st.name = "%s"
         RETURN DISTINCT ID(root) as id, root.name as name, type.name as model
     """ % ( predicate["name"], subject["name"], obj["name"], )
     return connection.cypher(query).to_dicts()
Exemple #7
0
 def rdf_search_query(self, subject, predicate, obj):
     identifier = obj["id"] if "id" in obj else obj
     # retrieve all models in current topic
     all_models = dict((model.__name__, model) for model in self.get_models())
     # If the received identifier describe a literal value
     if self.is_registered_literal(predicate["name"]):
         # Get the field name into the database
         field_name = predicate["name"]
         # Build the request
         query = """
             START root=node(*)
             MATCH (root)<-[:`<<INSTANCE>>`]-(type)
             WHERE HAS(root.name)
             AND HAS(root.{field})
             AND root.{field} = {value}
             AND type.model_name = {model}
             AND type.app_label = '{app}'
             RETURN DISTINCT ID(root) as id, root.name as name, type.model_name as model
         """.format(
             field=ield_name,
             value=identifier,
             model=subject["name"],
             app=self.app_label()
         )
         
     # If the received identifier describe a literal value
     elif self.is_registered_relationship(predicate["name"]):
         fields        = utils.iterate_model_fields( all_models[predicate["subject"]] )
         # Get the field name into the database
         relationships = [ field for field in fields if field["name"] == predicate["name"] ]
         # We didn't find the predicate
         if not len(relationships): return {'errors': 'Unkown predicate type'}
         relationship  = relationships[0]["rel_type"]
         # Query to get every result
         query = u"""
             START st=node({id})
             MATCH (st){is_out}-[:`{relationship}`]-{is_in}(root)<-[:`<<INSTANCE>>`]-(type)
             WHERE HAS(root.name)
             AND HAS(st.name)
             AND type.app_label = '{app}'
             RETURN DISTINCT ID(root) as id, root.name as name, type.model_name as model
         """.format(
             relationship=relationship,
             id=identifier,
             app=self.app_label(),
             is_out='<' if relationships[0]['direction'] == 'out' else '',
             is_in='>' if relationships[0]['direction'] == 'in' else ''
         )
         print query
     else:
         return {'errors': 'Unkown predicate type: %s' % predicate["name"]}
     return connection.cypher(query).to_dicts()
Exemple #8
0
 def query(self, root="*"):
     # Replace the query's tags 
     # by there choosen value
     query = self.query_str.format(
         root=root, 
         match=self.match.format(
             select=self.select,
             model=self.model
         ),
         select=self.select,
     )
     # Execute the query and returnt the result as a dictionnary
     return self.transform(connection.cypher(query).to_dicts())
Exemple #9
0
    def rdf_search(self, subject, predicate, obj):
        obj = obj["name"] if "name" in obj else obj
        # retrieve all models in current topic
        all_models = dict((model.__name__, model) for model in self.topic.get_models())
        # If the received obj describe a literal value
        if self.is_registered_literal(predicate["name"]):
            # Get the field name into the database
            field_name = predicate["name"]
            # Build the request
            query = """
                START root=node(*)
                MATCH (root)<-[:`<<INSTANCE>>`]-(type)
                WHERE HAS(root.name)
                AND HAS(root.{field})
                AND root.{field} = {value}
                AND type.model_name = {model}
                AND type.app_label = {app}
                RETURN DISTINCT ID(root) as id, root.name as name, type.model_name as model
            """.format(
                field=field_name,
                value=adapt(obj),
                model=adapt(subject["name"]),
                app=adapt(self.topic.app_label())
            )
        # If the received obj describe a literal value
        elif self.is_registered_relationship(predicate["name"]):
            fields        = utils.get_model_fields( all_models[predicate["subject"]] )
            # Get the field name into the database
            relationships = [ field for field in fields if field["name"] == predicate["name"] ]
            # We didn't find the predicate
            if not len(relationships): return {'errors': 'Unkown predicate type'}
            relationship  = relationships[0]["rel_type"]
            # Query to get every result
            query = """
                START st=node(*)
                MATCH (st)<-[:`{relationship}`]-(root)<-[:`<<INSTANCE>>`]-(type)
                WHERE HAS(root.name)
                AND HAS(st.name)
                AND st.name = {name}
                AND type.app_label = {app}
                RETURN DISTINCT ID(root) as id, root.name as name, type.model_name as model
            """.format(
                relationship=relationship,
                name=adapt(obj),
                app=adapt(self.topic.app_label())
            )
        else:
            return {'errors': 'Unkown predicate type'}

        return connection.cypher(query).to_dicts()
Exemple #10
0
 def obj_get_list(self, request=None, **kwargs):
     request = kwargs["bundle"].request if request == None else request
     # Super user only
     if not request.user.is_superuser:
         # We force tastypie to render the response directly
         raise ImmediateHttpResponse(response=HttpResponse('Unauthorized', status=401))
     query = request.GET["q"];
     data  = connection.cypher(query).to_dicts()
     # Serialize content in json
     # @TODO implement a better format support
     content  = self.serializer(data, "application/json")
     # Create an HTTP response
     response = HttpResponse(content=content, content_type="application/json")
     # We force tastypie to render the response directly
     raise ImmediateHttpResponse(response=response)
Exemple #11
0
def get_model_nodes():
    # Return buffer values
    if hasattr(get_model_nodes, "buffer"): 
        results = get_model_nodes.buffer
        # Refresh the buffer ~ 1/10 calls
        if randint(0,10) == 10: del get_model_nodes.buffer
        return results
    query = """
        START n=node(*)
        MATCH n-[r:`<<TYPE>>`]->t
        WHERE HAS(t.name)
        RETURN t.name as name, ID(t) as id
    """
    # Bufferize the result
    get_model_nodes.buffer = connection.cypher(query).to_dicts()    
    return get_model_nodes.buffer
Exemple #12
0
 def obj_get_list(self, request=None, **kwargs):
     request = kwargs["bundle"].request if request == None else request
     # Super user only
     if not request.user.is_superuser:
         # We force tastypie to render the response directly
         raise ImmediateHttpResponse(
             response=HttpResponse('Unauthorized', status=401))
     query = request.GET["q"]
     data = connection.cypher(query).to_dicts()
     # Serialize content in json
     # @TODO implement a better format support
     content = self.serializer(data, "application/json")
     # Create an HTTP response
     response = HttpResponse(content=content,
                             content_type="application/json")
     # We force tastypie to render the response directly
     raise ImmediateHttpResponse(response=response)
Exemple #13
0
def get_model_nodes():
    from neo4django.db import connection
    # Return buffer values
    if hasattr(get_model_nodes, "buffer"):
        results = get_model_nodes.buffer
        # Refresh the buffer ~ 1/10 calls
        if randint(0, 10) == 10: del get_model_nodes.buffer
        return results
    query = """
        START n=node(0)
        MATCH n-[r:`<<TYPE>>`]->t
        WHERE HAS(t.name)
        RETURN t.name as name, ID(t) as id
    """
    # Bufferize the result
    get_model_nodes.buffer = connection.cypher(query).to_dicts()
    return get_model_nodes.buffer
Exemple #14
0
 def summary_types(self, bundle):
     # Query to aggreagte relationships count by country
     query = """
         START n=node(*)
         MATCH (c)<-[r:`<<INSTANCE>>`]-(n)
         WHERE HAS(n.model_name)
         RETURN ID(n) as id, n.model_name as name, count(c) as count
     """
     # Get the data and convert it to dictionnary
     types = connection.cypher(query).to_dicts()
     obj   = {}
     for t in types:
         # Use name as identifier
         obj[ t["name"] ] = t
         # name is now useless
         del t["name"]
     return obj
Exemple #15
0
    def __get_topic_entities_count(topic):
        """

        Return the number of entities in the current topic.
        Used to inform administrator.
        Expensive request. Can be cached a long time.

        """
        query = """
            START root=node(*)
            MATCH p = (root)--(leaf)<-[:`<<INSTANCE>>`]-(type)
            WHERE HAS(leaf.name)
            AND type.app_label = '{app_label}'
            AND length(filter(r in relationships(p) : type(r) = "<<INSTANCE>>")) = 1
            RETURN count(leaf) AS count
        """.format(app_label=Migration.__get_topic_label(topic))
        response = connection.cypher(query).to_dicts()[0].get("count")
        return response
    def __get_topic_entities_count(topic):
        """

        Return the number of entities in the current topic.
        Used to inform administrator.
        Expensive request. Can be cached a long time.

        """
        query = """
            START root=node(*)
            MATCH p = (root)--(leaf)<-[:`<<INSTANCE>>`]-(type)
            WHERE HAS(leaf.name)
            AND type.app_label = '{app_label}'
            AND length(filter(r in relationships(p) : type(r) = "<<INSTANCE>>")) = 1
            RETURN count(leaf) AS count
        """.format(app_label = Migration.__get_topic_label(topic))
        response = connection.cypher(query).to_dicts()[0].get("count")
        return response
Exemple #17
0
 def summary_types(self, bundle, request):
     app_label = self.topic.app_label()
     # Query to aggreagte relationships count by country
     query = """
         START n=node(*)
         MATCH (c)<-[r:`<<INSTANCE>>`]-(n)
         WHERE HAS(n.model_name)
         AND n.app_label = '%s'
         RETURN ID(n) as id, n.model_name as name, count(c) as count
     """ % app_label
     # Get the data and convert it to dictionnary
     types = connection.cypher(query).to_dicts()
     obj = {}
     for t in types:
         # Use name as identifier
         obj[t["name"].lower()] = t
         # name is now useless
         del t["name"]
     return obj
Exemple #18
0
 def summary_countries(self, bundle):
     model_id = get_model_node_id(Country)
     # The Country isn't set yet in neo4j
     if model_id == None: raise Http404()
     # Query to aggreagte relationships count by country
     query = """
         START n=node(%d)
         MATCH (i)<-[*0..1]->(country)<-[r:`<<INSTANCE>>`]-(n)
         WHERE HAS(country.isoa3)
         RETURN country.isoa3 as isoa3, ID(country) as id, count(i)-1 as count
     """ % int(model_id)
     # Get the data and convert it to dictionnary
     countries = connection.cypher(query).to_dicts()
     obj       = {}
     for country in countries:
         # Use isoa3 as identifier
         obj[ country["isoa3"] ] = country
         # ISOA3 is now useless
         del country["isoa3"]
     return obj
Exemple #19
0
 def search(self, terms):
     if type(terms) in [str, unicode]:
         terms = [terms]
     matches = []
     for term in terms:
         term = unicode(term).lower()
         term = re.sub("\"|'|`|;|:|{|}|\|(|\|)|\|", '', term).strip()
         matches.append("LOWER(node.name) =~ '.*(%s).*'" % term)
     # Query to get every result
     query = """
         START root=node(0)
         MATCH (node)<-[r:`<<INSTANCE>>`]-(type)<-[`<<TYPE>>`]-(root)
         WHERE HAS(node.name) """
     if matches:
         query += """
         AND (%s) """ % ( " OR ".join(matches))
     query += """
         AND type.app_label = '%s'
         RETURN ID(node) as id, node.name as name, type.model_name as model
     """ % (self.topic.app_label())
     return connection.cypher(query).to_dicts()
Exemple #20
0
 def get_most_related(self, rel):
     # Cache key to save the result of this function for each topic and rel
     cache_key = "most_related_%s" % rel
     # Get cache value
     most_related = topic_cache.get(self.topic, cache_key)
     # Return cache value
     if most_related is not None: return most_related
     # Build query
     query = """
         START root=node(0)
         MATCH target-[r:`%s`]->(edge)<-[`<<INSTANCE>>`]-(type)<-[`<<TYPE>>`]-(root)
         WHERE type.app_label = "%s"
         AND HAS(edge.name)
         RETURN COUNT(target) as cnt, ID(edge) as id, edge.name as name, type.model_name as model
         ORDER BY cnt DESC
         LIMIT 5
     """ % ( rel, self.topic.app_label() )
     # Get data from neo4j
     most_related = connection.cypher(query).to_dicts()
     # Cache and return result
     topic_cache.set(self.topic, cache_key, most_related)
     return most_related
Exemple #21
0
    def entities_count(self):
        """

        Return the number of entities in the current topic.
        Used to inform administrator.
        Expensive request. Cached a long time.

        """
        if not self.id: return 0
        cache_key = "entities_count"
        response = utils.topic_cache.get(self, cache_key)
        if response is None:
            query = """
                START a = node(0)
                MATCH a-[`<<TYPE>>`]->(b)--> c
                WHERE b.app_label = "{app_label}"
                AND not(has(c._relationship))
                RETURN count(c) as count;
            """.format(app_label=self.app_label())
            response = connection.cypher(query).to_dicts()[0].get("count")
            utils.topic_cache.set(self, cache_key, response, 60*60*12) # cached 12 hours
        return response
Exemple #22
0
 def summary_countries(self, bundle, request):
     app_label = self.topic.app_label()
     # Query to aggreagte relationships count by country
     query = """
         START n=node(*)
         MATCH (m)-[:`<<INSTANCE>>`]->(i)<-[*0..1]->(country)<-[r:`<<INSTANCE>>`]-(n)
         WHERE HAS(country.isoa3)
         AND HAS(n.model_name)
         AND n.model_name = 'Country'
         AND n.app_label = '%s'
         AND HAS(country.isoa3)
         RETURN country.isoa3 as isoa3, ID(country) as id, count(i)-1 as count
     """ % app_label
     # Get the data and convert it to dictionnary
     countries = connection.cypher(query).to_dicts()
     obj = {}
     for country in countries:
         # Use isoa3 as identifier
         obj[country["isoa3"]] = country
         # ISOA3 is now useless
         del country["isoa3"]
     return obj
Exemple #23
0
    def by_name(self, terms):
        if type(terms) in [str, unicode]:
            terms = [terms]
        matches = []
        for term in terms:
            term = unicode(term).lower()
            term = re.sub("\"|'|`|;|:|{|}|\|(|\|)|\|", '', term).strip()
            matches.append("LOWER(node.name) =~ '.*(%s).*'" % term)
        # Query to get every result
        query = """
            START root=node(0)
            MATCH (node)<-[r:`<<INSTANCE>>`]-(type)<-[`<<TYPE>>`]-(root)
            WHERE HAS(node.name) """
        if matches:
            query += """
            AND (%s) """ % ( " OR ".join(matches))
        query += """
            AND type.app_label = '%s'
            RETURN ID(node) as id, node.name as name, type.model_name as model
        """ % (self.topic.app_label())

        return connection.cypher(query).to_dicts()
Exemple #24
0
 def get_most_related(self, rel):
     # Cache key to save the result of this function for each topic and rel
     cache_key = "most_related_%s" % rel
     # Get cache value
     most_related = topic_cache.get(self.topic, cache_key)
     # Return cache value
     if most_related is not None: return most_related
     # Build query
     query = """
         START root=node(0)
         MATCH target-[r:`%s`]->(edge)<-[`<<INSTANCE>>`]-(type)<-[`<<TYPE>>`]-(root)
         WHERE type.app_label = "%s"
         AND HAS(edge.name)
         RETURN COUNT(target) as cnt, ID(edge) as id, edge.name as name, type.model_name as model
         ORDER BY cnt DESC
         LIMIT 5
     """ % ( rel, self.topic.app_label() )
     # Get data from neo4j
     most_related = connection.cypher(query).to_dicts()
     # Cache and return result
     topic_cache.set(self.topic, cache_key, most_related)
     return most_related
Exemple #25
0
 def summary_countries(self, bundle, request):
     app_label = self.topic.app_label()
     # Query to aggreagte relationships count by country
     query = """
         START n=node(*)
         MATCH (m)-[:`<<INSTANCE>>`]->(i)<-[*0..1]->(country)<-[r:`<<INSTANCE>>`]-(n)
         WHERE HAS(country.isoa3)
         AND HAS(n.model_name)
         AND n.model_name = 'Country'
         AND n.app_label = '%s'
         AND HAS(country.isoa3)
         RETURN country.isoa3 as isoa3, ID(country) as id, count(i)-1 as count
     """ % app_label
     # Get the data and convert it to dictionnary
     countries = connection.cypher(query).to_dicts()
     obj       = {}
     for country in countries:
         # Use isoa3 as identifier
         obj[ country["isoa3"] ] = country
         # ISOA3 is now useless
         del country["isoa3"]
     return obj
Exemple #26
0
    def entities_count(self):
        """

        Return the number of entities in the current topic.
        Used to inform administrator.
        Expensive request. Cached a long time.

        """
        if not self.id: return 0
        cache_key = "entities_count"
        response = utils.topic_cache.get(self, cache_key)
        if response is None:
            query = """
                START a = node(0)
                MATCH a-[:`<<TYPE>>`]->(b)-[:`<<INSTANCE>>`]->(c)
                WHERE b.app_label = "{app_label}"
                AND not(has(c._relationship))
                RETURN count(c) as count;
            """.format(app_label=self.app_label())
            response = connection.cypher(query).to_dicts()[0].get("count")
            utils.topic_cache.set(self, cache_key, response,
                                  60 * 60 * 12)  # cached 12 hours
        return response
Exemple #27
0
    def _get_leafs_and_edges(topic, depth, root_node):
        from neo4django.db import connection
        leafs = {}
        edges = []
        leafs_related = []
        ###
        # First we retrieve every leaf in the graph
        if root_node == "0":
            query = """
                START root = node({root})
                MATCH root-[`<<TYPE>>`]->(type)--> leaf
                WHERE type.app_label = '{app_label}'
                AND not(has(leaf._relationship))
                RETURN leaf, ID(leaf) as id_leaf, type
            """.format(root=root_node, depth=depth, app_label=topic.app_label())
        else:
            query = """
                START root=node({root})
                MATCH p = (root)-[*1..{depth}]-(leaf)<-[:`<<INSTANCE>>`]-(type)
                WHERE HAS(leaf.name)
                AND type.app_label = '{app_label}'
                AND length(filter(r in relationships(p) : type(r) = "<<INSTANCE>>")) = 1
                RETURN leaf, ID(leaf) as id_leaf, type
            """.format(root=root_node, depth=depth, app_label=topic.app_label())
        rows = connection.cypher(query).to_dicts()

        if root_node != "0":
            # We need to retrieve the root in another request
            # TODO : enhance that
            query = """
                START root=node({root})
                MATCH (root)<-[:`<<INSTANCE>>`]-(type)
                RETURN root as leaf, ID(root) as id_leaf, type
            """.format(root=root_node)
            for row in connection.cypher(query).to_dicts():
                rows.append(row)
        # filter rows using the models in ontology
        # FIXME: should be in the cypher query
        models_in_ontology = map(lambda m: m.__name__.lower(), topic.get_models())
        rows = filter(lambda r: r['type']['data']['model_name'].lower() in models_in_ontology, rows)

        for row in rows:
            row['leaf']['data']['_id'] = row['id_leaf']
            row['leaf']['data']['_type'] = row['type']['data']['model_name']
            leafs[row['id_leaf']] = row['leaf']['data']
        if len(leafs) == 0:
            return ([], [])

        # Then we retrieve all edges
        query = """
            START A=node({leafs})
            MATCH (A)-[rel]->(B)
            WHERE type(rel) <> "<<INSTANCE>>"
            RETURN ID(A) as head, type(rel) as relation, id(B) as tail
        """.format(leafs=','.join([str(id) for id in leafs.keys()]))
        rows = connection.cypher(query).to_dicts()
        for row in rows:
            try:
                if (leafs[row['head']] and leafs[row['tail']]):
                    leafs_related.extend([row['head'], row['tail']])
                    edges.append([row['head'], row['relation'], row['tail']])
            except KeyError:
                pass
        # filter edges with relations in ontology
        models_fields         = itertools.chain(*map(iterate_model_fields, topic.get_models()))
        relations_in_ontology = set(map(lambda _: _.get("rel_type"), models_fields))
        edges                 = [e for e in edges if e[1] in relations_in_ontology]
        # filter leafts without relations
        # FIXME: should be in the cypher query
        leafs_related = set(leafs_related)
        leafs = dict((k, v) for k, v in leafs.iteritems() if k in leafs_related)
        return (leafs, edges)
Exemple #28
0
    def summary_mine(self, bundle, request):
        app_label = self.topic.app_label()
        self.method_check(request, allowed=['get'])

        limit = int(request.GET.get('limit', 20))
        offset = int(request.GET.get('offset', 0))

        if request.user.id is None:
            object_list = {
                'objects': [],
                'meta': {
                    'page': 1,
                    'limit': limit,
                    'total_count': 0
                }
            }
        else:
            query = """
                START root=node(0)
                MATCH (node)<-[r:`<<INSTANCE>>`]-(type)<-[`<<TYPE>>`]-(root)
                WHERE HAS(node.name)
                AND HAS(node._author)
                AND HAS(type.model_name)
                AND %s IN node._author
                AND type.app_label = '%s'
                RETURN DISTINCT ID(root) as id, node.name as name, type.model_name as model
            """ % ( int(request.user.id), app_label )

            matches      = connection.cypher(query).to_dicts()
            paginator    = Paginator(matches, limit)

            try:
                p     = self.get_page_number(offset, limit)
                page  = paginator.page(p)
            except InvalidPage:
                raise Http404("Sorry, no results on that page.")

            objects = []
            for result in page.object_list:
                label = result.get("name", None)
                objects.append({
                    'label': label,
                    'subject': {
                        "name": result.get("id", None),
                        "label": label
                    },
                    'predicate': {
                        "label": "is instance of",
                        "name": "<<INSTANCE>>"
                    },
                    'object': result.get("model", None)
                })

            object_list = {
                'objects': objects,
                'meta': {
                    'page': p,
                    'limit': limit,
                    'total_count': paginator.count
                }
            }

        return object_list
Exemple #29
0
    def summary_mine(self, bundle, request):
        app_label = self.topic.app_label()
        self.method_check(request, allowed=['get'])

        limit = int(request.GET.get('limit', 20))
        offset = int(request.GET.get('offset', 0))

        if request.user.id is None:
            object_list = {
                'objects': [],
                'meta': {
                    'page': 1,
                    'limit': limit,
                    'total_count': 0
                }
            }
        else:
            query = """
                START root=node(0)
                MATCH (node)<-[r:`<<INSTANCE>>`]-(type)<-[`<<TYPE>>`]-(root)
                WHERE HAS(node.name)
                AND HAS(node._author)
                AND HAS(type.model_name)
                AND %s IN node._author
                AND type.app_label = '%s'
                RETURN DISTINCT ID(root) as id, node.name as name, type.model_name as model
            """ % (int(request.user.id), app_label)

            matches = connection.cypher(query).to_dicts()
            paginator = Paginator(matches, limit)

            try:
                p = self.get_page_number(offset, limit)
                page = paginator.page(p)
            except InvalidPage:
                raise Http404("Sorry, no results on that page.")

            objects = []
            for result in page.object_list:
                label = result.get("name", None)
                objects.append({
                    'label': label,
                    'subject': {
                        "name": result.get("id", None),
                        "label": label
                    },
                    'predicate': {
                        "label": "is instance of",
                        "name": "<<INSTANCE>>"
                    },
                    'object': result.get("model", None)
                })

            object_list = {
                'objects': objects,
                'meta': {
                    'page': p,
                    'limit': limit,
                    'total_count': paginator.count
                }
            }

        return object_list
    def get_graph(self, request, **kwargs):
        self.method_check(request, allowed=['get'])
        self.throttle_check(request)

        depth = int(request.GET['depth']) if 'depth' in request.GET.keys() else 1
        aggregation_threshold = 10

        def reduce_destination(outgoing_links, keep_id=None):
            # We count the popularity of each entering relationsip by node
            counter = {}
            # Counter will have the following structure
            # {
            #   "<NAME_OF_A_RELATIONSHIP>" : {
            #       "<IDX_OF_A_DESTINATION>": set("<IDX_OF_AN_ORIGIN>", ...)
            #   }
            # }
            for origin in outgoing_links:
                for rel in outgoing_links[origin]:
                    for dest in outgoing_links[origin][rel]:
                        if int(origin) != int(keep_id):
                            counter[rel]       = counter.get(rel, {})
                            counter[rel][dest] = counter[rel].get(dest, set())
                            counter[rel][dest].add(origin)
            # List of entering link (aggregate outside 'outgoing_links')
            entering_links = {}
            # Check now witch link must be move to entering outgoing_links
            for rel in counter:
                for dest in counter[rel]:
                    # Too much entering  outgoing_links!
                    if len(counter[rel][dest]) > aggregation_threshold:
                        entering_links[dest] = entering_links.get(dest, {"_AGGREGATION_": set() })
                        entering_links[dest]["_AGGREGATION_"] = entering_links[dest]["_AGGREGATION_"].union(counter[rel][dest])
            # We remove element within a copy to avoid changing the size of the
            # dict durring an itteration
            outgoing_links_copy = copy.deepcopy(outgoing_links)
            for i in entering_links:
                # Convert aggregation set to list for JSON serialization
                entering_links[i]["_AGGREGATION_"] = list( entering_links[i]["_AGGREGATION_"] )
                # Remove entering_links from
                for j in outgoing_links:
                    if int(j) == int(keep_id): continue
                    for rel in outgoing_links[j]:
                        if i in outgoing_links[j][rel]:
                            # Remove the enterging id
                            outgoing_links_copy[j][rel].remove(i)
                        # Remove the relationship
                        if rel in outgoing_links_copy[j] and len(outgoing_links_copy[j][rel]) == 0:
                            del outgoing_links_copy[j][rel]
                    # Remove the origin
                    if len(outgoing_links_copy[j]) == 0:
                        del outgoing_links_copy[j]

            return outgoing_links_copy, entering_links


        def reduce_origin(rows):
            # No nodes, no links
            if len(rows) == 0: return ([], [],)
            # Initialize structures
            all_nodes = dict()
            # Use defaultdict() to create somewhat of an autovivificating list
            # We want to build a structure of the form:
            # { source_id : { relation_name : [ target_ids ] } }
            # Must use a set() instead of list() to avoid checking duplicates but it screw up json.dumps()
            all_links = defaultdict(lambda: dict(__count=0, __relations=defaultdict(list)))
            IDs = set(sum([row['nodes'] for row in rows], []))

            # Get all entities from their IDs
            query = """
                START root = node({0})
                MATCH (root)-[:`<<INSTANCE>>`]-(type)
                WHERE type.app_label = '{1}'
                AND HAS(root.name)
                RETURN ID(root) as ID, root, type
            """.format(','.join([str(ID) for ID in IDs]), get_model_topic(self.get_model()))
            all_raw_nodes = connection.cypher(query).to_dicts()
            for row in all_raw_nodes:
                # Twist some data in the entity
                for key in row['root']['data'].keys():
                    if key[0] == '_': del row['root']['data'][key]
                row['root']['data']['_type'] = row['type']['data']['model_name']
                row['root']['data']['_id'] = row['ID']

                all_nodes[row['ID']] = row['root']['data']

            for row in rows:
                nodes = row['nodes']
                i = 0
                for relation in row['relations']:
                    try:
                        if all_nodes[nodes[i]] is None or all_nodes[nodes[i + 1]] is None: continue
                        (a, b) = (nodes[i], nodes[i + 1])
                        if re.search('^'+to_underscores(all_nodes[nodes[i]]['_type']), relation) is None:
                            (a, b) = (nodes[i + 1], nodes[i])
                        if not b in all_links[a]['__relations'][relation]:
                            all_links[a]['__count'] += 1
                            all_links[a]['__relations'][relation].append(b)
                    except KeyError: pass
                    i += 1

            # Sort and aggregate nodes when we're over the threshold
            for node in all_links.keys():
                shortcut = all_links[node]['__relations']
                if all_links[node]['__count'] >= aggregation_threshold:
                    sorted_relations = sorted([(len(shortcut[rel]), rel) for rel in shortcut],
                                              key=lambda to_sort: to_sort[0])
                    shortcut = defaultdict(list)
                    i = 0
                    while i < aggregation_threshold:
                        for rel in sorted_relations:
                            try:
                                node_id = all_links[node]['__relations'][rel[1]].pop()
                                shortcut[rel[1]].append(node_id)
                                i += 1
                            except IndexError:
                                # Must except IndexError if we .pop() on an empty list
                                pass
                            if i >= aggregation_threshold: break
                    shortcut['_AGGREGATION_'] = sum(all_links[node]['__relations'].values(), [])
                all_links[node] = shortcut

            return (all_nodes, all_links)

        query = """
            START root=node({0})
            MATCH path = (root)-[*1..{1}]-(leaf)
            WITH extract(r in relationships(path)|type(r)) as relations, extract(n in nodes(path)|ID(n)) as nodes
            WHERE ALL(rel  in relations WHERE rel <> "<<INSTANCE>>")
            RETURN relations, nodes
        """.format(kwargs['pk'], depth)
        rows = connection.cypher(query).to_dicts()

        nodes, links                   = reduce_origin(rows)
        outgoing_links, entering_links = reduce_destination(links, keep_id=kwargs['pk'])

        self.log_throttled_access(request)
        return self.create_response(request, {'nodes':nodes,'outgoing_links': outgoing_links, 'entering_links': entering_links})
Exemple #31
0
def render_csv_zip_file(topic, model_type=None, query=None, cache_key=None):
    def write_all_in_zip(objects, columns, zip_file, model_name=None):
        """
        Write the csv file from `objects` and `columns` and add it into the `zip_file` file.
        If given, `model_name` will be the name of the csv file.
        If `cache_key` is defined, will put the generated file name in the default cache with the given key.
        """
        # set a getattr function depending of the type of `objects`
        if isinstance(objects[0], dict):

            def _getattr(o, prop):
                return o.get(prop, "")
        else:

            def _getattr(o, prop):
                return getattr(o, prop)

        all_ids = []
        csv_file = StringIO()
        model_name = model_name or objects[0].__class__.__name__
        spamwriter = csv.writer(csv_file)
        spamwriter.writerow(["%s_id" % (model_name)] + columns)  # header
        for obj in objects:
            all_ids.append(_getattr(obj, 'id'))
            obj_columns = []
            for column in columns:
                val = _getattr(obj, column)
                if val:
                    val = unicode(val).encode('utf-8')
                obj_columns.append(val)
            spamwriter.writerow([_getattr(obj, 'id')] + obj_columns)
        zip_file.writestr("{0}.csv".format(model_name), csv_file.getvalue())
        csv_file.close()
        return all_ids

    def get_columns(model):
        edges = dict()
        columns = []
        for field in utils.iterate_model_fields(model):
            if field['type'] != 'Relationship':
                if field['name'] not in ['id']:
                    columns.append(field['name'])
            else:
                edges[field['rel_type']] = [
                    field['model'], field['name'], field['related_model']
                ]
        return (columns, edges)

    buffer = StringIO()
    zip_file = zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED)
    models = topic.get_models()
    if not query:
        export_edges = not model_type
        for model in models:
            if model_type and model.__name__.lower() != model_type:
                continue
            (columns, edges) = get_columns(model)

            objects = model.objects.all()
            if objects.count() > 0:
                all_ids = write_all_in_zip(objects, columns, zip_file)
                if export_edges:
                    for key in edges.keys():
                        rows = connection.cypher("""
                            START root=node({nodes})
                            MATCH (root)-[r:`{rel}`]->(leaf)
                            RETURN id(root) as id_from, id(leaf) as id_to
                        """.format(nodes=','.join([str(id) for id in all_ids]),
                                   rel=key)).to_dicts()
                        csv_file = StringIO()
                        spamwriter = csv.writer(csv_file)
                        spamwriter.writerow([
                            "%s_id" % (edges[key][0]), edges[key][1],
                            "%s_id" % (edges[key][2])
                        ])  # header
                        for row in rows:
                            spamwriter.writerow(
                                [row['id_from'], None, row['id_to']])
                        zip_file.writestr(
                            "{0}_{1}.csv".format(edges[key][0], edges[key][1]),
                            csv_file.getvalue())
                        csv_file.close()
    else:
        page = 1
        limit = 1
        objects = []
        total = -1
        while len(objects) != total:
            try:
                result = topic.rdf_search(query=query,
                                          offset=(page - 1) * limit)
                objects += result['objects']
                total = result['meta']['total_count']
                page += 1
            except KeyError:
                break
            except InvalidPage:
                break
        for model in models:
            if model.__name__ == objects[0]['model']:
                break
        (columns, _) = get_columns(model)
        write_all_in_zip(objects, columns, zip_file, model.__name__)
    zip_file.close()
    buffer.flush()
    # save the zip in `base_dir`
    base_dir = "csv-exports"
    file_name = "%s/d.io-export-%s.zip" % (base_dir, topic.slug)
    # name can be changed by default storage if previous exists
    file_name = default_storage.save(file_name, ContentFile(buffer.getvalue()))
    buffer.close()
    file_name = "%s%s" % (settings.MEDIA_URL, file_name)
    # save in cache if cache_key is defined
    if cache_key:
        utils.topic_cache.set(topic, cache_key, file_name, 60 * 60 * 24)
    return dict(file_name=file_name)
        def reduce_origin(rows):
            # No nodes, no links
            if len(rows) == 0: return ([], [],)
            # Initialize structures
            all_nodes = dict()
            # Use defaultdict() to create somewhat of an autovivificating list
            # We want to build a structure of the form:
            # { source_id : { relation_name : [ target_ids ] } }
            # Must use a set() instead of list() to avoid checking duplicates but it screw up json.dumps()
            all_links = defaultdict(lambda: dict(__count=0, __relations=defaultdict(list)))
            IDs = set(sum([row['nodes'] for row in rows], []))

            # Get all entities from their IDs
            query = """
                START root = node({0})
                MATCH (root)-[:`<<INSTANCE>>`]-(type)
                WHERE type.app_label = '{1}'
                AND HAS(root.name)
                RETURN ID(root) as ID, root, type
            """.format(','.join([str(ID) for ID in IDs]), get_model_topic(self.get_model()))
            all_raw_nodes = connection.cypher(query).to_dicts()
            for row in all_raw_nodes:
                # Twist some data in the entity
                for key in row['root']['data'].keys():
                    if key[0] == '_': del row['root']['data'][key]
                row['root']['data']['_type'] = row['type']['data']['model_name']
                row['root']['data']['_id'] = row['ID']

                all_nodes[row['ID']] = row['root']['data']

            for row in rows:
                nodes = row['nodes']
                i = 0
                for relation in row['relations']:
                    try:
                        if all_nodes[nodes[i]] is None or all_nodes[nodes[i + 1]] is None: continue
                        (a, b) = (nodes[i], nodes[i + 1])
                        if re.search('^'+to_underscores(all_nodes[nodes[i]]['_type']), relation) is None:
                            (a, b) = (nodes[i + 1], nodes[i])
                        if not b in all_links[a]['__relations'][relation]:
                            all_links[a]['__count'] += 1
                            all_links[a]['__relations'][relation].append(b)
                    except KeyError: pass
                    i += 1

            # Sort and aggregate nodes when we're over the threshold
            for node in all_links.keys():
                shortcut = all_links[node]['__relations']
                if all_links[node]['__count'] >= aggregation_threshold:
                    sorted_relations = sorted([(len(shortcut[rel]), rel) for rel in shortcut],
                                              key=lambda to_sort: to_sort[0])
                    shortcut = defaultdict(list)
                    i = 0
                    while i < aggregation_threshold:
                        for rel in sorted_relations:
                            try:
                                node_id = all_links[node]['__relations'][rel[1]].pop()
                                shortcut[rel[1]].append(node_id)
                                i += 1
                            except IndexError:
                                # Must except IndexError if we .pop() on an empty list
                                pass
                            if i >= aggregation_threshold: break
                    shortcut['_AGGREGATION_'] = sum(all_links[node]['__relations'].values(), [])
                all_links[node] = shortcut

            return (all_nodes, all_links)
Exemple #33
0
    def _get_leafs_and_edges(topic, depth, root_node):
        from neo4django.db import connection
        leafs = {}
        edges = []
        leafs_related = []
        ###
        # First we retrieve every leaf in the graph
        if root_node == "0":
            query = """
                START root = node({root})
                MATCH root-[`<<TYPE>>`]->(type)--> leaf
                WHERE type.app_label = '{app_label}'
                AND not(has(leaf._relationship))
                RETURN leaf, ID(leaf) as id_leaf, type
            """.format(root=root_node,
                       depth=depth,
                       app_label=topic.app_label())
        else:
            query = """
                START root=node({root})
                MATCH p = (root)-[*1..{depth}]-(leaf)<-[:`<<INSTANCE>>`]-(type)
                WHERE HAS(leaf.name)
                AND type.app_label = '{app_label}'
                AND length(filter(r in relationships(p) : type(r) = "<<INSTANCE>>")) = 1
                RETURN leaf, ID(leaf) as id_leaf, type
            """.format(root=root_node,
                       depth=depth,
                       app_label=topic.app_label())
        rows = connection.cypher(query).to_dicts()

        if root_node != "0":
            # We need to retrieve the root in another request
            # TODO : enhance that
            query = """
                START root=node({root})
                MATCH (root)<-[:`<<INSTANCE>>`]-(type)
                RETURN root as leaf, ID(root) as id_leaf, type
            """.format(root=root_node)
            for row in connection.cypher(query).to_dicts():
                rows.append(row)
        # filter rows using the models in ontology
        # FIXME: should be in the cypher query
        models_in_ontology = map(lambda m: m.__name__.lower(),
                                 topic.get_models())
        rows = filter(
            lambda r: r['type']['data']['model_name'].lower() in
            models_in_ontology, rows)

        for row in rows:
            row['leaf']['data']['_id'] = row['id_leaf']
            row['leaf']['data']['_type'] = row['type']['data']['model_name']
            leafs[row['id_leaf']] = row['leaf']['data']
        if len(leafs) == 0:
            return ([], [])

        # Then we retrieve all edges
        query = """
            START A=node({leafs})
            MATCH (A)-[rel]->(B)
            WHERE type(rel) <> "<<INSTANCE>>"
            RETURN ID(A) as head, type(rel) as relation, id(B) as tail
        """.format(leafs=','.join([str(id) for id in leafs.keys()]))
        rows = connection.cypher(query).to_dicts()
        for row in rows:
            try:
                if (leafs[row['head']] and leafs[row['tail']]):
                    leafs_related.extend([row['head'], row['tail']])
                    edges.append([row['head'], row['relation'], row['tail']])
            except KeyError:
                pass
        # filter edges with relations in ontology
        models_fields = itertools.chain(
            *map(iterate_model_fields, topic.get_models()))
        relations_in_ontology = set(
            map(lambda _: _.get("rel_type"), models_fields))
        edges = [e for e in edges if e[1] in relations_in_ontology]
        # filter leafts without relations
        # FIXME: should be in the cypher query
        leafs_related = set(leafs_related)
        leafs = dict(
            (k, v) for k, v in leafs.iteritems() if k in leafs_related)
        return (leafs, edges)
Exemple #34
0
def render_csv_zip_file(topic, model_type=None, query=None, cache_key=None):

    def write_all_in_zip(objects, columns, zip_file, model_name=None):
        """
        Write the csv file from `objects` and `columns` and add it into the `zip_file` file.
        If given, `model_name` will be the name of the csv file.
        If `cache_key` is defined, will put the generated file name in the default cache with the given key.
        """
        # set a getattr function depending of the type of `objects`
        if isinstance(objects[0], dict):
            def _getattr(o, prop): return o.get(prop, "")
        else:
            def _getattr(o, prop): return getattr(o, prop)
        all_ids    = []
        csv_file   = StringIO()
        model_name = model_name or objects[0].__class__.__name__
        spamwriter = csv.writer(csv_file)
        spamwriter.writerow(["%s_id" % (model_name)] + columns) # header
        for obj in objects:
            all_ids.append(_getattr(obj, 'id'))
            obj_columns = []
            for column in columns:
                val = _getattr(obj, column)
                if val:
                    val = unicode(val).encode('utf-8')
                obj_columns.append(val)
            spamwriter.writerow([_getattr(obj, 'id')] + obj_columns)
        zip_file.writestr("{0}.csv".format(model_name), csv_file.getvalue())
        csv_file.close()
        return all_ids

    def get_columns(model):
        edges   = dict()
        columns = []
        for field in utils.iterate_model_fields(model):
            if field['type'] != 'Relationship':
                if field['name'] not in ['id']:
                    columns.append(field['name'])
            else:
                edges[field['rel_type']] = [field['model'], field['name'], field['related_model']]
        return (columns, edges)

    buffer   = StringIO()
    zip_file = zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED)
    models   = topic.get_models()
    if not query:
        export_edges = not model_type
        for model in models:
            if model_type and model.__name__.lower() != model_type:
                continue
            (columns, edges) = get_columns(model)
            objects = model.objects.all()
            if objects.count() > 0:
                all_ids = write_all_in_zip(objects, columns, zip_file)
                if export_edges:
                    for key in edges.keys():
                        rows = connection.cypher("""
                            START root=node({nodes})
                            MATCH (root)-[r:`{rel}`]->(leaf)
                            RETURN id(root) as id_from, id(leaf) as id_to
                        """.format(nodes=','.join([str(id) for id in all_ids]), rel=key)).to_dicts()
                        csv_file = StringIO()
                        spamwriter = csv.writer(csv_file)
                        spamwriter.writerow(["%s_id" % (edges[key][0]), edges[key][1], "%s_id" % (edges[key][2])]) # header
                        for row in rows:
                            spamwriter.writerow([row['id_from'], None, row['id_to']])
                        zip_file.writestr("{0}_{1}.csv".format(edges[key][0], edges[key][1]), csv_file.getvalue())
                        csv_file.close()
    else:
        page        = 1
        limit       = 1
        objects     = []
        total       = -1
        while len(objects) != total:
            try:
                result   = topic.rdf_search(query=query, offset=(page - 1) * limit)
                objects += result['objects']
                total    = result['meta']['total_count']
                page    += 1
            except KeyError:
                break
            except InvalidPage:
                break
        for model in models:
            if model.__name__ == objects[0]['model']:
                break
        (columns, _) = get_columns(model)
        write_all_in_zip(objects, columns, zip_file, model.__name__)
    zip_file.close()
    buffer.flush()
    # save the zip in `base_dir`
    base_dir  = "csv-exports"
    file_name = "%s/d.io-export-%s.zip" % (base_dir, topic.slug)
    # name can be changed by default storage if previous exists
    file_name = default_storage.save(file_name, ContentFile(buffer.getvalue()))
    buffer.close()
    file_name = "%s%s" % (settings.MEDIA_URL, file_name)
    # save in cache if cache_key is defined
    if cache_key:
        utils.topic_cache.set(topic, cache_key, file_name, 60*60*24)
    return dict(file_name=file_name)