def get_graph(self, request, **kwargs): self.method_check(request, allowed=['get']) self.throttle_check(request) depth = int(request.GET['depth']) if 'depth' in request.GET.keys() else 1 aggregation_threshold = 10 ### # First we retrieve every leaf in the graph query = """ START root=node({root}) MATCH p = (root)-[*1..{depth}]-(leaf)<-[:`<<INSTANCE>>`]-(type) WHERE HAS(leaf.name) AND type.app_label = '{app_label}' AND length(filter(r in relationships(p) : type(r) = "<<INSTANCE>>")) = 1 RETURN leaf, ID(leaf) as id_leaf, type """.format(root=kwargs['pk'], depth=depth, app_label=get_model_topic(self.get_model())) rows = connection.cypher(query).to_dicts() leafs = {} # We need to retrieve the root in another request # TODO : enhance that query = """ START root=node({root}) MATCH (root)<-[:`<<INSTANCE>>`]-(type) RETURN root as leaf, ID(root) as id_leaf, type """.format(root=kwargs['pk']) for row in connection.cypher(query).to_dicts(): rows.append(row) for row in rows: row['leaf']['data']['_id'] = row['id_leaf'] row['leaf']['data']['_type'] = row['type']['data']['model_name'] leafs[row['id_leaf']] = row['leaf']['data'] # ### ### # Then we retrieve all edges query = """ START A=node({leafs}) MATCH (A)-[rel]->(B) WHERE type(rel) <> "<<INSTANCE>>" RETURN ID(A) as head, type(rel) as relation, id(B) as tail """.format(leafs=','.join([str(id) for id in leafs.keys()])) rows = connection.cypher(query).to_dicts() edges = [] for row in rows: try: if (leafs[row['head']] and leafs[row['tail']]): edges.append([row['head'], row['relation'], row['tail']]) except KeyError: pass # ### self.log_throttled_access(request) return self.create_response(request, {'leafs': leafs, 'edges' : edges})
def query(self, root="*"): # Replace the query's tags # by there choosen value query = self.query_str.format( root=root, match=self.match.format(select=self.select, model=self.model), select=self.select, ) # Execute the query and returnt the result as a dictionnary return self.transform(connection.cypher(query).to_dicts())
def summary_mine(self, bundle): request = bundle.request self.method_check(request, allowed=['get']) self.throttle_check(request) if not request.user.id: raise UnauthorizedError('This method require authentication') query = """ START root=node(*) MATCH (type)-[`<<INSTANCE>>`]->(root) WHERE HAS(root.name) AND HAS(root._author) AND HAS(type.model_name) AND %s IN root._author RETURN DISTINCT ID(root) as id, root.name as name, type.name as model """ % int(request.user.id) matches = connection.cypher(query).to_dicts() count = len(matches) limit = int(request.GET.get('limit', 20)) paginator = Paginator(matches, limit) try: p = int(request.GET.get('page', 1)) page = paginator.page(p) except InvalidPage: raise Http404("Sorry, no results on that page.") objects = [] for result in page.object_list: label = result.get("name", None) objects.append({ 'label': label, 'subject': { "name": result.get("id", None), "label": label }, 'predicate': { "label": "is instance of", "name": "<<INSTANCE>>" }, 'object': result.get("model", None) }) object_list = { 'objects': objects, 'meta': { 'page': p, 'limit': limit, 'total_count': count } } return object_list
def rdf_search_query(self, subject, predicate, obj): identifier = obj["id"] if "id" in obj else obj # retrieve all models in current topic all_models = dict( (model.__name__, model) for model in self.get_models()) # If the received identifier describe a literal value if self.is_registered_literal(predicate["name"]): # Get the field name into the database field_name = predicate["name"] # Build the request query = """ START root=node(*) MATCH (root)<-[:`<<INSTANCE>>`]-(type) WHERE HAS(root.name) AND HAS(root.{field}) AND root.{field} = {value} AND type.model_name = {model} AND type.app_label = '{app}' RETURN DISTINCT ID(root) as id, root.name as name, type.model_name as model """.format(field=field_name, value=identifier, model=subject["name"], app=self.app_label()) # If the received identifier describe a literal value elif self.is_registered_relationship(predicate["name"]): fields = utils.iterate_model_fields( all_models[predicate["subject"]]) # Get the field name into the database relationships = [ field for field in fields if field["name"] == predicate["name"] ] # We didn't find the predicate if not len(relationships): return {'errors': 'Unkown predicate type'} relationship = relationships[0]["rel_type"] # Query to get every result query = u""" START st=node({id}) MATCH (st){is_out}-[:`{relationship}`]-{is_in}(root)<-[:`<<INSTANCE>>`]-(type) WHERE HAS(root.name) AND HAS(st.name) AND type.app_label = '{app}' RETURN DISTINCT ID(root) as id, root.name as name, type.model_name as model """.format( relationship=relationship, id=identifier, app=self.app_label(), is_out='<' if relationships[0]['direction'] == 'out' else '', is_in='>' if relationships[0]['direction'] == 'in' else '') else: return {'errors': 'Unkown predicate type: %s' % predicate["name"]} return connection.cypher(query).to_dicts()
def search(self, query): match = str(query).lower() match = re.sub("\"|'|`|;|:|{|}|\|(|\|)|\|", '', match).strip() # Query to get every result query = """ START root=node(*) MATCH (root)<-[r:`<<INSTANCE>>`]-(type) WHERE HAS(root.name) AND LOWER(root.name) =~ '.*(%s).*' RETURN ID(root) as id, root.name as name, type.name as model """ % match return connection.cypher(query).to_dicts()
def rdf_search(self, subject, predicate, obj): # Query to get every result query = """ START st=node(*) MATCH (st)<-[:`%s`]-(root)<-[:`<<INSTANCE>>`]-(type) WHERE HAS(root.name) AND HAS(st.name) AND type.name = "%s" AND st.name = "%s" RETURN DISTINCT ID(root) as id, root.name as name, type.name as model """ % ( predicate["name"], subject["name"], obj["name"], ) return connection.cypher(query).to_dicts()
def rdf_search_query(self, subject, predicate, obj): identifier = obj["id"] if "id" in obj else obj # retrieve all models in current topic all_models = dict((model.__name__, model) for model in self.get_models()) # If the received identifier describe a literal value if self.is_registered_literal(predicate["name"]): # Get the field name into the database field_name = predicate["name"] # Build the request query = """ START root=node(*) MATCH (root)<-[:`<<INSTANCE>>`]-(type) WHERE HAS(root.name) AND HAS(root.{field}) AND root.{field} = {value} AND type.model_name = {model} AND type.app_label = '{app}' RETURN DISTINCT ID(root) as id, root.name as name, type.model_name as model """.format( field=ield_name, value=identifier, model=subject["name"], app=self.app_label() ) # If the received identifier describe a literal value elif self.is_registered_relationship(predicate["name"]): fields = utils.iterate_model_fields( all_models[predicate["subject"]] ) # Get the field name into the database relationships = [ field for field in fields if field["name"] == predicate["name"] ] # We didn't find the predicate if not len(relationships): return {'errors': 'Unkown predicate type'} relationship = relationships[0]["rel_type"] # Query to get every result query = u""" START st=node({id}) MATCH (st){is_out}-[:`{relationship}`]-{is_in}(root)<-[:`<<INSTANCE>>`]-(type) WHERE HAS(root.name) AND HAS(st.name) AND type.app_label = '{app}' RETURN DISTINCT ID(root) as id, root.name as name, type.model_name as model """.format( relationship=relationship, id=identifier, app=self.app_label(), is_out='<' if relationships[0]['direction'] == 'out' else '', is_in='>' if relationships[0]['direction'] == 'in' else '' ) print query else: return {'errors': 'Unkown predicate type: %s' % predicate["name"]} return connection.cypher(query).to_dicts()
def query(self, root="*"): # Replace the query's tags # by there choosen value query = self.query_str.format( root=root, match=self.match.format( select=self.select, model=self.model ), select=self.select, ) # Execute the query and returnt the result as a dictionnary return self.transform(connection.cypher(query).to_dicts())
def rdf_search(self, subject, predicate, obj): obj = obj["name"] if "name" in obj else obj # retrieve all models in current topic all_models = dict((model.__name__, model) for model in self.topic.get_models()) # If the received obj describe a literal value if self.is_registered_literal(predicate["name"]): # Get the field name into the database field_name = predicate["name"] # Build the request query = """ START root=node(*) MATCH (root)<-[:`<<INSTANCE>>`]-(type) WHERE HAS(root.name) AND HAS(root.{field}) AND root.{field} = {value} AND type.model_name = {model} AND type.app_label = {app} RETURN DISTINCT ID(root) as id, root.name as name, type.model_name as model """.format( field=field_name, value=adapt(obj), model=adapt(subject["name"]), app=adapt(self.topic.app_label()) ) # If the received obj describe a literal value elif self.is_registered_relationship(predicate["name"]): fields = utils.get_model_fields( all_models[predicate["subject"]] ) # Get the field name into the database relationships = [ field for field in fields if field["name"] == predicate["name"] ] # We didn't find the predicate if not len(relationships): return {'errors': 'Unkown predicate type'} relationship = relationships[0]["rel_type"] # Query to get every result query = """ START st=node(*) MATCH (st)<-[:`{relationship}`]-(root)<-[:`<<INSTANCE>>`]-(type) WHERE HAS(root.name) AND HAS(st.name) AND st.name = {name} AND type.app_label = {app} RETURN DISTINCT ID(root) as id, root.name as name, type.model_name as model """.format( relationship=relationship, name=adapt(obj), app=adapt(self.topic.app_label()) ) else: return {'errors': 'Unkown predicate type'} return connection.cypher(query).to_dicts()
def obj_get_list(self, request=None, **kwargs): request = kwargs["bundle"].request if request == None else request # Super user only if not request.user.is_superuser: # We force tastypie to render the response directly raise ImmediateHttpResponse(response=HttpResponse('Unauthorized', status=401)) query = request.GET["q"]; data = connection.cypher(query).to_dicts() # Serialize content in json # @TODO implement a better format support content = self.serializer(data, "application/json") # Create an HTTP response response = HttpResponse(content=content, content_type="application/json") # We force tastypie to render the response directly raise ImmediateHttpResponse(response=response)
def get_model_nodes(): # Return buffer values if hasattr(get_model_nodes, "buffer"): results = get_model_nodes.buffer # Refresh the buffer ~ 1/10 calls if randint(0,10) == 10: del get_model_nodes.buffer return results query = """ START n=node(*) MATCH n-[r:`<<TYPE>>`]->t WHERE HAS(t.name) RETURN t.name as name, ID(t) as id """ # Bufferize the result get_model_nodes.buffer = connection.cypher(query).to_dicts() return get_model_nodes.buffer
def obj_get_list(self, request=None, **kwargs): request = kwargs["bundle"].request if request == None else request # Super user only if not request.user.is_superuser: # We force tastypie to render the response directly raise ImmediateHttpResponse( response=HttpResponse('Unauthorized', status=401)) query = request.GET["q"] data = connection.cypher(query).to_dicts() # Serialize content in json # @TODO implement a better format support content = self.serializer(data, "application/json") # Create an HTTP response response = HttpResponse(content=content, content_type="application/json") # We force tastypie to render the response directly raise ImmediateHttpResponse(response=response)
def get_model_nodes(): from neo4django.db import connection # Return buffer values if hasattr(get_model_nodes, "buffer"): results = get_model_nodes.buffer # Refresh the buffer ~ 1/10 calls if randint(0, 10) == 10: del get_model_nodes.buffer return results query = """ START n=node(0) MATCH n-[r:`<<TYPE>>`]->t WHERE HAS(t.name) RETURN t.name as name, ID(t) as id """ # Bufferize the result get_model_nodes.buffer = connection.cypher(query).to_dicts() return get_model_nodes.buffer
def summary_types(self, bundle): # Query to aggreagte relationships count by country query = """ START n=node(*) MATCH (c)<-[r:`<<INSTANCE>>`]-(n) WHERE HAS(n.model_name) RETURN ID(n) as id, n.model_name as name, count(c) as count """ # Get the data and convert it to dictionnary types = connection.cypher(query).to_dicts() obj = {} for t in types: # Use name as identifier obj[ t["name"] ] = t # name is now useless del t["name"] return obj
def __get_topic_entities_count(topic): """ Return the number of entities in the current topic. Used to inform administrator. Expensive request. Can be cached a long time. """ query = """ START root=node(*) MATCH p = (root)--(leaf)<-[:`<<INSTANCE>>`]-(type) WHERE HAS(leaf.name) AND type.app_label = '{app_label}' AND length(filter(r in relationships(p) : type(r) = "<<INSTANCE>>")) = 1 RETURN count(leaf) AS count """.format(app_label=Migration.__get_topic_label(topic)) response = connection.cypher(query).to_dicts()[0].get("count") return response
def __get_topic_entities_count(topic): """ Return the number of entities in the current topic. Used to inform administrator. Expensive request. Can be cached a long time. """ query = """ START root=node(*) MATCH p = (root)--(leaf)<-[:`<<INSTANCE>>`]-(type) WHERE HAS(leaf.name) AND type.app_label = '{app_label}' AND length(filter(r in relationships(p) : type(r) = "<<INSTANCE>>")) = 1 RETURN count(leaf) AS count """.format(app_label = Migration.__get_topic_label(topic)) response = connection.cypher(query).to_dicts()[0].get("count") return response
def summary_types(self, bundle, request): app_label = self.topic.app_label() # Query to aggreagte relationships count by country query = """ START n=node(*) MATCH (c)<-[r:`<<INSTANCE>>`]-(n) WHERE HAS(n.model_name) AND n.app_label = '%s' RETURN ID(n) as id, n.model_name as name, count(c) as count """ % app_label # Get the data and convert it to dictionnary types = connection.cypher(query).to_dicts() obj = {} for t in types: # Use name as identifier obj[t["name"].lower()] = t # name is now useless del t["name"] return obj
def summary_countries(self, bundle): model_id = get_model_node_id(Country) # The Country isn't set yet in neo4j if model_id == None: raise Http404() # Query to aggreagte relationships count by country query = """ START n=node(%d) MATCH (i)<-[*0..1]->(country)<-[r:`<<INSTANCE>>`]-(n) WHERE HAS(country.isoa3) RETURN country.isoa3 as isoa3, ID(country) as id, count(i)-1 as count """ % int(model_id) # Get the data and convert it to dictionnary countries = connection.cypher(query).to_dicts() obj = {} for country in countries: # Use isoa3 as identifier obj[ country["isoa3"] ] = country # ISOA3 is now useless del country["isoa3"] return obj
def search(self, terms): if type(terms) in [str, unicode]: terms = [terms] matches = [] for term in terms: term = unicode(term).lower() term = re.sub("\"|'|`|;|:|{|}|\|(|\|)|\|", '', term).strip() matches.append("LOWER(node.name) =~ '.*(%s).*'" % term) # Query to get every result query = """ START root=node(0) MATCH (node)<-[r:`<<INSTANCE>>`]-(type)<-[`<<TYPE>>`]-(root) WHERE HAS(node.name) """ if matches: query += """ AND (%s) """ % ( " OR ".join(matches)) query += """ AND type.app_label = '%s' RETURN ID(node) as id, node.name as name, type.model_name as model """ % (self.topic.app_label()) return connection.cypher(query).to_dicts()
def get_most_related(self, rel): # Cache key to save the result of this function for each topic and rel cache_key = "most_related_%s" % rel # Get cache value most_related = topic_cache.get(self.topic, cache_key) # Return cache value if most_related is not None: return most_related # Build query query = """ START root=node(0) MATCH target-[r:`%s`]->(edge)<-[`<<INSTANCE>>`]-(type)<-[`<<TYPE>>`]-(root) WHERE type.app_label = "%s" AND HAS(edge.name) RETURN COUNT(target) as cnt, ID(edge) as id, edge.name as name, type.model_name as model ORDER BY cnt DESC LIMIT 5 """ % ( rel, self.topic.app_label() ) # Get data from neo4j most_related = connection.cypher(query).to_dicts() # Cache and return result topic_cache.set(self.topic, cache_key, most_related) return most_related
def entities_count(self): """ Return the number of entities in the current topic. Used to inform administrator. Expensive request. Cached a long time. """ if not self.id: return 0 cache_key = "entities_count" response = utils.topic_cache.get(self, cache_key) if response is None: query = """ START a = node(0) MATCH a-[`<<TYPE>>`]->(b)--> c WHERE b.app_label = "{app_label}" AND not(has(c._relationship)) RETURN count(c) as count; """.format(app_label=self.app_label()) response = connection.cypher(query).to_dicts()[0].get("count") utils.topic_cache.set(self, cache_key, response, 60*60*12) # cached 12 hours return response
def summary_countries(self, bundle, request): app_label = self.topic.app_label() # Query to aggreagte relationships count by country query = """ START n=node(*) MATCH (m)-[:`<<INSTANCE>>`]->(i)<-[*0..1]->(country)<-[r:`<<INSTANCE>>`]-(n) WHERE HAS(country.isoa3) AND HAS(n.model_name) AND n.model_name = 'Country' AND n.app_label = '%s' AND HAS(country.isoa3) RETURN country.isoa3 as isoa3, ID(country) as id, count(i)-1 as count """ % app_label # Get the data and convert it to dictionnary countries = connection.cypher(query).to_dicts() obj = {} for country in countries: # Use isoa3 as identifier obj[country["isoa3"]] = country # ISOA3 is now useless del country["isoa3"] return obj
def by_name(self, terms): if type(terms) in [str, unicode]: terms = [terms] matches = [] for term in terms: term = unicode(term).lower() term = re.sub("\"|'|`|;|:|{|}|\|(|\|)|\|", '', term).strip() matches.append("LOWER(node.name) =~ '.*(%s).*'" % term) # Query to get every result query = """ START root=node(0) MATCH (node)<-[r:`<<INSTANCE>>`]-(type)<-[`<<TYPE>>`]-(root) WHERE HAS(node.name) """ if matches: query += """ AND (%s) """ % ( " OR ".join(matches)) query += """ AND type.app_label = '%s' RETURN ID(node) as id, node.name as name, type.model_name as model """ % (self.topic.app_label()) return connection.cypher(query).to_dicts()
def summary_countries(self, bundle, request): app_label = self.topic.app_label() # Query to aggreagte relationships count by country query = """ START n=node(*) MATCH (m)-[:`<<INSTANCE>>`]->(i)<-[*0..1]->(country)<-[r:`<<INSTANCE>>`]-(n) WHERE HAS(country.isoa3) AND HAS(n.model_name) AND n.model_name = 'Country' AND n.app_label = '%s' AND HAS(country.isoa3) RETURN country.isoa3 as isoa3, ID(country) as id, count(i)-1 as count """ % app_label # Get the data and convert it to dictionnary countries = connection.cypher(query).to_dicts() obj = {} for country in countries: # Use isoa3 as identifier obj[ country["isoa3"] ] = country # ISOA3 is now useless del country["isoa3"] return obj
def entities_count(self): """ Return the number of entities in the current topic. Used to inform administrator. Expensive request. Cached a long time. """ if not self.id: return 0 cache_key = "entities_count" response = utils.topic_cache.get(self, cache_key) if response is None: query = """ START a = node(0) MATCH a-[:`<<TYPE>>`]->(b)-[:`<<INSTANCE>>`]->(c) WHERE b.app_label = "{app_label}" AND not(has(c._relationship)) RETURN count(c) as count; """.format(app_label=self.app_label()) response = connection.cypher(query).to_dicts()[0].get("count") utils.topic_cache.set(self, cache_key, response, 60 * 60 * 12) # cached 12 hours return response
def _get_leafs_and_edges(topic, depth, root_node): from neo4django.db import connection leafs = {} edges = [] leafs_related = [] ### # First we retrieve every leaf in the graph if root_node == "0": query = """ START root = node({root}) MATCH root-[`<<TYPE>>`]->(type)--> leaf WHERE type.app_label = '{app_label}' AND not(has(leaf._relationship)) RETURN leaf, ID(leaf) as id_leaf, type """.format(root=root_node, depth=depth, app_label=topic.app_label()) else: query = """ START root=node({root}) MATCH p = (root)-[*1..{depth}]-(leaf)<-[:`<<INSTANCE>>`]-(type) WHERE HAS(leaf.name) AND type.app_label = '{app_label}' AND length(filter(r in relationships(p) : type(r) = "<<INSTANCE>>")) = 1 RETURN leaf, ID(leaf) as id_leaf, type """.format(root=root_node, depth=depth, app_label=topic.app_label()) rows = connection.cypher(query).to_dicts() if root_node != "0": # We need to retrieve the root in another request # TODO : enhance that query = """ START root=node({root}) MATCH (root)<-[:`<<INSTANCE>>`]-(type) RETURN root as leaf, ID(root) as id_leaf, type """.format(root=root_node) for row in connection.cypher(query).to_dicts(): rows.append(row) # filter rows using the models in ontology # FIXME: should be in the cypher query models_in_ontology = map(lambda m: m.__name__.lower(), topic.get_models()) rows = filter(lambda r: r['type']['data']['model_name'].lower() in models_in_ontology, rows) for row in rows: row['leaf']['data']['_id'] = row['id_leaf'] row['leaf']['data']['_type'] = row['type']['data']['model_name'] leafs[row['id_leaf']] = row['leaf']['data'] if len(leafs) == 0: return ([], []) # Then we retrieve all edges query = """ START A=node({leafs}) MATCH (A)-[rel]->(B) WHERE type(rel) <> "<<INSTANCE>>" RETURN ID(A) as head, type(rel) as relation, id(B) as tail """.format(leafs=','.join([str(id) for id in leafs.keys()])) rows = connection.cypher(query).to_dicts() for row in rows: try: if (leafs[row['head']] and leafs[row['tail']]): leafs_related.extend([row['head'], row['tail']]) edges.append([row['head'], row['relation'], row['tail']]) except KeyError: pass # filter edges with relations in ontology models_fields = itertools.chain(*map(iterate_model_fields, topic.get_models())) relations_in_ontology = set(map(lambda _: _.get("rel_type"), models_fields)) edges = [e for e in edges if e[1] in relations_in_ontology] # filter leafts without relations # FIXME: should be in the cypher query leafs_related = set(leafs_related) leafs = dict((k, v) for k, v in leafs.iteritems() if k in leafs_related) return (leafs, edges)
def summary_mine(self, bundle, request): app_label = self.topic.app_label() self.method_check(request, allowed=['get']) limit = int(request.GET.get('limit', 20)) offset = int(request.GET.get('offset', 0)) if request.user.id is None: object_list = { 'objects': [], 'meta': { 'page': 1, 'limit': limit, 'total_count': 0 } } else: query = """ START root=node(0) MATCH (node)<-[r:`<<INSTANCE>>`]-(type)<-[`<<TYPE>>`]-(root) WHERE HAS(node.name) AND HAS(node._author) AND HAS(type.model_name) AND %s IN node._author AND type.app_label = '%s' RETURN DISTINCT ID(root) as id, node.name as name, type.model_name as model """ % ( int(request.user.id), app_label ) matches = connection.cypher(query).to_dicts() paginator = Paginator(matches, limit) try: p = self.get_page_number(offset, limit) page = paginator.page(p) except InvalidPage: raise Http404("Sorry, no results on that page.") objects = [] for result in page.object_list: label = result.get("name", None) objects.append({ 'label': label, 'subject': { "name": result.get("id", None), "label": label }, 'predicate': { "label": "is instance of", "name": "<<INSTANCE>>" }, 'object': result.get("model", None) }) object_list = { 'objects': objects, 'meta': { 'page': p, 'limit': limit, 'total_count': paginator.count } } return object_list
def summary_mine(self, bundle, request): app_label = self.topic.app_label() self.method_check(request, allowed=['get']) limit = int(request.GET.get('limit', 20)) offset = int(request.GET.get('offset', 0)) if request.user.id is None: object_list = { 'objects': [], 'meta': { 'page': 1, 'limit': limit, 'total_count': 0 } } else: query = """ START root=node(0) MATCH (node)<-[r:`<<INSTANCE>>`]-(type)<-[`<<TYPE>>`]-(root) WHERE HAS(node.name) AND HAS(node._author) AND HAS(type.model_name) AND %s IN node._author AND type.app_label = '%s' RETURN DISTINCT ID(root) as id, node.name as name, type.model_name as model """ % (int(request.user.id), app_label) matches = connection.cypher(query).to_dicts() paginator = Paginator(matches, limit) try: p = self.get_page_number(offset, limit) page = paginator.page(p) except InvalidPage: raise Http404("Sorry, no results on that page.") objects = [] for result in page.object_list: label = result.get("name", None) objects.append({ 'label': label, 'subject': { "name": result.get("id", None), "label": label }, 'predicate': { "label": "is instance of", "name": "<<INSTANCE>>" }, 'object': result.get("model", None) }) object_list = { 'objects': objects, 'meta': { 'page': p, 'limit': limit, 'total_count': paginator.count } } return object_list
def get_graph(self, request, **kwargs): self.method_check(request, allowed=['get']) self.throttle_check(request) depth = int(request.GET['depth']) if 'depth' in request.GET.keys() else 1 aggregation_threshold = 10 def reduce_destination(outgoing_links, keep_id=None): # We count the popularity of each entering relationsip by node counter = {} # Counter will have the following structure # { # "<NAME_OF_A_RELATIONSHIP>" : { # "<IDX_OF_A_DESTINATION>": set("<IDX_OF_AN_ORIGIN>", ...) # } # } for origin in outgoing_links: for rel in outgoing_links[origin]: for dest in outgoing_links[origin][rel]: if int(origin) != int(keep_id): counter[rel] = counter.get(rel, {}) counter[rel][dest] = counter[rel].get(dest, set()) counter[rel][dest].add(origin) # List of entering link (aggregate outside 'outgoing_links') entering_links = {} # Check now witch link must be move to entering outgoing_links for rel in counter: for dest in counter[rel]: # Too much entering outgoing_links! if len(counter[rel][dest]) > aggregation_threshold: entering_links[dest] = entering_links.get(dest, {"_AGGREGATION_": set() }) entering_links[dest]["_AGGREGATION_"] = entering_links[dest]["_AGGREGATION_"].union(counter[rel][dest]) # We remove element within a copy to avoid changing the size of the # dict durring an itteration outgoing_links_copy = copy.deepcopy(outgoing_links) for i in entering_links: # Convert aggregation set to list for JSON serialization entering_links[i]["_AGGREGATION_"] = list( entering_links[i]["_AGGREGATION_"] ) # Remove entering_links from for j in outgoing_links: if int(j) == int(keep_id): continue for rel in outgoing_links[j]: if i in outgoing_links[j][rel]: # Remove the enterging id outgoing_links_copy[j][rel].remove(i) # Remove the relationship if rel in outgoing_links_copy[j] and len(outgoing_links_copy[j][rel]) == 0: del outgoing_links_copy[j][rel] # Remove the origin if len(outgoing_links_copy[j]) == 0: del outgoing_links_copy[j] return outgoing_links_copy, entering_links def reduce_origin(rows): # No nodes, no links if len(rows) == 0: return ([], [],) # Initialize structures all_nodes = dict() # Use defaultdict() to create somewhat of an autovivificating list # We want to build a structure of the form: # { source_id : { relation_name : [ target_ids ] } } # Must use a set() instead of list() to avoid checking duplicates but it screw up json.dumps() all_links = defaultdict(lambda: dict(__count=0, __relations=defaultdict(list))) IDs = set(sum([row['nodes'] for row in rows], [])) # Get all entities from their IDs query = """ START root = node({0}) MATCH (root)-[:`<<INSTANCE>>`]-(type) WHERE type.app_label = '{1}' AND HAS(root.name) RETURN ID(root) as ID, root, type """.format(','.join([str(ID) for ID in IDs]), get_model_topic(self.get_model())) all_raw_nodes = connection.cypher(query).to_dicts() for row in all_raw_nodes: # Twist some data in the entity for key in row['root']['data'].keys(): if key[0] == '_': del row['root']['data'][key] row['root']['data']['_type'] = row['type']['data']['model_name'] row['root']['data']['_id'] = row['ID'] all_nodes[row['ID']] = row['root']['data'] for row in rows: nodes = row['nodes'] i = 0 for relation in row['relations']: try: if all_nodes[nodes[i]] is None or all_nodes[nodes[i + 1]] is None: continue (a, b) = (nodes[i], nodes[i + 1]) if re.search('^'+to_underscores(all_nodes[nodes[i]]['_type']), relation) is None: (a, b) = (nodes[i + 1], nodes[i]) if not b in all_links[a]['__relations'][relation]: all_links[a]['__count'] += 1 all_links[a]['__relations'][relation].append(b) except KeyError: pass i += 1 # Sort and aggregate nodes when we're over the threshold for node in all_links.keys(): shortcut = all_links[node]['__relations'] if all_links[node]['__count'] >= aggregation_threshold: sorted_relations = sorted([(len(shortcut[rel]), rel) for rel in shortcut], key=lambda to_sort: to_sort[0]) shortcut = defaultdict(list) i = 0 while i < aggregation_threshold: for rel in sorted_relations: try: node_id = all_links[node]['__relations'][rel[1]].pop() shortcut[rel[1]].append(node_id) i += 1 except IndexError: # Must except IndexError if we .pop() on an empty list pass if i >= aggregation_threshold: break shortcut['_AGGREGATION_'] = sum(all_links[node]['__relations'].values(), []) all_links[node] = shortcut return (all_nodes, all_links) query = """ START root=node({0}) MATCH path = (root)-[*1..{1}]-(leaf) WITH extract(r in relationships(path)|type(r)) as relations, extract(n in nodes(path)|ID(n)) as nodes WHERE ALL(rel in relations WHERE rel <> "<<INSTANCE>>") RETURN relations, nodes """.format(kwargs['pk'], depth) rows = connection.cypher(query).to_dicts() nodes, links = reduce_origin(rows) outgoing_links, entering_links = reduce_destination(links, keep_id=kwargs['pk']) self.log_throttled_access(request) return self.create_response(request, {'nodes':nodes,'outgoing_links': outgoing_links, 'entering_links': entering_links})
def render_csv_zip_file(topic, model_type=None, query=None, cache_key=None): def write_all_in_zip(objects, columns, zip_file, model_name=None): """ Write the csv file from `objects` and `columns` and add it into the `zip_file` file. If given, `model_name` will be the name of the csv file. If `cache_key` is defined, will put the generated file name in the default cache with the given key. """ # set a getattr function depending of the type of `objects` if isinstance(objects[0], dict): def _getattr(o, prop): return o.get(prop, "") else: def _getattr(o, prop): return getattr(o, prop) all_ids = [] csv_file = StringIO() model_name = model_name or objects[0].__class__.__name__ spamwriter = csv.writer(csv_file) spamwriter.writerow(["%s_id" % (model_name)] + columns) # header for obj in objects: all_ids.append(_getattr(obj, 'id')) obj_columns = [] for column in columns: val = _getattr(obj, column) if val: val = unicode(val).encode('utf-8') obj_columns.append(val) spamwriter.writerow([_getattr(obj, 'id')] + obj_columns) zip_file.writestr("{0}.csv".format(model_name), csv_file.getvalue()) csv_file.close() return all_ids def get_columns(model): edges = dict() columns = [] for field in utils.iterate_model_fields(model): if field['type'] != 'Relationship': if field['name'] not in ['id']: columns.append(field['name']) else: edges[field['rel_type']] = [ field['model'], field['name'], field['related_model'] ] return (columns, edges) buffer = StringIO() zip_file = zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) models = topic.get_models() if not query: export_edges = not model_type for model in models: if model_type and model.__name__.lower() != model_type: continue (columns, edges) = get_columns(model) objects = model.objects.all() if objects.count() > 0: all_ids = write_all_in_zip(objects, columns, zip_file) if export_edges: for key in edges.keys(): rows = connection.cypher(""" START root=node({nodes}) MATCH (root)-[r:`{rel}`]->(leaf) RETURN id(root) as id_from, id(leaf) as id_to """.format(nodes=','.join([str(id) for id in all_ids]), rel=key)).to_dicts() csv_file = StringIO() spamwriter = csv.writer(csv_file) spamwriter.writerow([ "%s_id" % (edges[key][0]), edges[key][1], "%s_id" % (edges[key][2]) ]) # header for row in rows: spamwriter.writerow( [row['id_from'], None, row['id_to']]) zip_file.writestr( "{0}_{1}.csv".format(edges[key][0], edges[key][1]), csv_file.getvalue()) csv_file.close() else: page = 1 limit = 1 objects = [] total = -1 while len(objects) != total: try: result = topic.rdf_search(query=query, offset=(page - 1) * limit) objects += result['objects'] total = result['meta']['total_count'] page += 1 except KeyError: break except InvalidPage: break for model in models: if model.__name__ == objects[0]['model']: break (columns, _) = get_columns(model) write_all_in_zip(objects, columns, zip_file, model.__name__) zip_file.close() buffer.flush() # save the zip in `base_dir` base_dir = "csv-exports" file_name = "%s/d.io-export-%s.zip" % (base_dir, topic.slug) # name can be changed by default storage if previous exists file_name = default_storage.save(file_name, ContentFile(buffer.getvalue())) buffer.close() file_name = "%s%s" % (settings.MEDIA_URL, file_name) # save in cache if cache_key is defined if cache_key: utils.topic_cache.set(topic, cache_key, file_name, 60 * 60 * 24) return dict(file_name=file_name)
def reduce_origin(rows): # No nodes, no links if len(rows) == 0: return ([], [],) # Initialize structures all_nodes = dict() # Use defaultdict() to create somewhat of an autovivificating list # We want to build a structure of the form: # { source_id : { relation_name : [ target_ids ] } } # Must use a set() instead of list() to avoid checking duplicates but it screw up json.dumps() all_links = defaultdict(lambda: dict(__count=0, __relations=defaultdict(list))) IDs = set(sum([row['nodes'] for row in rows], [])) # Get all entities from their IDs query = """ START root = node({0}) MATCH (root)-[:`<<INSTANCE>>`]-(type) WHERE type.app_label = '{1}' AND HAS(root.name) RETURN ID(root) as ID, root, type """.format(','.join([str(ID) for ID in IDs]), get_model_topic(self.get_model())) all_raw_nodes = connection.cypher(query).to_dicts() for row in all_raw_nodes: # Twist some data in the entity for key in row['root']['data'].keys(): if key[0] == '_': del row['root']['data'][key] row['root']['data']['_type'] = row['type']['data']['model_name'] row['root']['data']['_id'] = row['ID'] all_nodes[row['ID']] = row['root']['data'] for row in rows: nodes = row['nodes'] i = 0 for relation in row['relations']: try: if all_nodes[nodes[i]] is None or all_nodes[nodes[i + 1]] is None: continue (a, b) = (nodes[i], nodes[i + 1]) if re.search('^'+to_underscores(all_nodes[nodes[i]]['_type']), relation) is None: (a, b) = (nodes[i + 1], nodes[i]) if not b in all_links[a]['__relations'][relation]: all_links[a]['__count'] += 1 all_links[a]['__relations'][relation].append(b) except KeyError: pass i += 1 # Sort and aggregate nodes when we're over the threshold for node in all_links.keys(): shortcut = all_links[node]['__relations'] if all_links[node]['__count'] >= aggregation_threshold: sorted_relations = sorted([(len(shortcut[rel]), rel) for rel in shortcut], key=lambda to_sort: to_sort[0]) shortcut = defaultdict(list) i = 0 while i < aggregation_threshold: for rel in sorted_relations: try: node_id = all_links[node]['__relations'][rel[1]].pop() shortcut[rel[1]].append(node_id) i += 1 except IndexError: # Must except IndexError if we .pop() on an empty list pass if i >= aggregation_threshold: break shortcut['_AGGREGATION_'] = sum(all_links[node]['__relations'].values(), []) all_links[node] = shortcut return (all_nodes, all_links)
def _get_leafs_and_edges(topic, depth, root_node): from neo4django.db import connection leafs = {} edges = [] leafs_related = [] ### # First we retrieve every leaf in the graph if root_node == "0": query = """ START root = node({root}) MATCH root-[`<<TYPE>>`]->(type)--> leaf WHERE type.app_label = '{app_label}' AND not(has(leaf._relationship)) RETURN leaf, ID(leaf) as id_leaf, type """.format(root=root_node, depth=depth, app_label=topic.app_label()) else: query = """ START root=node({root}) MATCH p = (root)-[*1..{depth}]-(leaf)<-[:`<<INSTANCE>>`]-(type) WHERE HAS(leaf.name) AND type.app_label = '{app_label}' AND length(filter(r in relationships(p) : type(r) = "<<INSTANCE>>")) = 1 RETURN leaf, ID(leaf) as id_leaf, type """.format(root=root_node, depth=depth, app_label=topic.app_label()) rows = connection.cypher(query).to_dicts() if root_node != "0": # We need to retrieve the root in another request # TODO : enhance that query = """ START root=node({root}) MATCH (root)<-[:`<<INSTANCE>>`]-(type) RETURN root as leaf, ID(root) as id_leaf, type """.format(root=root_node) for row in connection.cypher(query).to_dicts(): rows.append(row) # filter rows using the models in ontology # FIXME: should be in the cypher query models_in_ontology = map(lambda m: m.__name__.lower(), topic.get_models()) rows = filter( lambda r: r['type']['data']['model_name'].lower() in models_in_ontology, rows) for row in rows: row['leaf']['data']['_id'] = row['id_leaf'] row['leaf']['data']['_type'] = row['type']['data']['model_name'] leafs[row['id_leaf']] = row['leaf']['data'] if len(leafs) == 0: return ([], []) # Then we retrieve all edges query = """ START A=node({leafs}) MATCH (A)-[rel]->(B) WHERE type(rel) <> "<<INSTANCE>>" RETURN ID(A) as head, type(rel) as relation, id(B) as tail """.format(leafs=','.join([str(id) for id in leafs.keys()])) rows = connection.cypher(query).to_dicts() for row in rows: try: if (leafs[row['head']] and leafs[row['tail']]): leafs_related.extend([row['head'], row['tail']]) edges.append([row['head'], row['relation'], row['tail']]) except KeyError: pass # filter edges with relations in ontology models_fields = itertools.chain( *map(iterate_model_fields, topic.get_models())) relations_in_ontology = set( map(lambda _: _.get("rel_type"), models_fields)) edges = [e for e in edges if e[1] in relations_in_ontology] # filter leafts without relations # FIXME: should be in the cypher query leafs_related = set(leafs_related) leafs = dict( (k, v) for k, v in leafs.iteritems() if k in leafs_related) return (leafs, edges)
def render_csv_zip_file(topic, model_type=None, query=None, cache_key=None): def write_all_in_zip(objects, columns, zip_file, model_name=None): """ Write the csv file from `objects` and `columns` and add it into the `zip_file` file. If given, `model_name` will be the name of the csv file. If `cache_key` is defined, will put the generated file name in the default cache with the given key. """ # set a getattr function depending of the type of `objects` if isinstance(objects[0], dict): def _getattr(o, prop): return o.get(prop, "") else: def _getattr(o, prop): return getattr(o, prop) all_ids = [] csv_file = StringIO() model_name = model_name or objects[0].__class__.__name__ spamwriter = csv.writer(csv_file) spamwriter.writerow(["%s_id" % (model_name)] + columns) # header for obj in objects: all_ids.append(_getattr(obj, 'id')) obj_columns = [] for column in columns: val = _getattr(obj, column) if val: val = unicode(val).encode('utf-8') obj_columns.append(val) spamwriter.writerow([_getattr(obj, 'id')] + obj_columns) zip_file.writestr("{0}.csv".format(model_name), csv_file.getvalue()) csv_file.close() return all_ids def get_columns(model): edges = dict() columns = [] for field in utils.iterate_model_fields(model): if field['type'] != 'Relationship': if field['name'] not in ['id']: columns.append(field['name']) else: edges[field['rel_type']] = [field['model'], field['name'], field['related_model']] return (columns, edges) buffer = StringIO() zip_file = zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) models = topic.get_models() if not query: export_edges = not model_type for model in models: if model_type and model.__name__.lower() != model_type: continue (columns, edges) = get_columns(model) objects = model.objects.all() if objects.count() > 0: all_ids = write_all_in_zip(objects, columns, zip_file) if export_edges: for key in edges.keys(): rows = connection.cypher(""" START root=node({nodes}) MATCH (root)-[r:`{rel}`]->(leaf) RETURN id(root) as id_from, id(leaf) as id_to """.format(nodes=','.join([str(id) for id in all_ids]), rel=key)).to_dicts() csv_file = StringIO() spamwriter = csv.writer(csv_file) spamwriter.writerow(["%s_id" % (edges[key][0]), edges[key][1], "%s_id" % (edges[key][2])]) # header for row in rows: spamwriter.writerow([row['id_from'], None, row['id_to']]) zip_file.writestr("{0}_{1}.csv".format(edges[key][0], edges[key][1]), csv_file.getvalue()) csv_file.close() else: page = 1 limit = 1 objects = [] total = -1 while len(objects) != total: try: result = topic.rdf_search(query=query, offset=(page - 1) * limit) objects += result['objects'] total = result['meta']['total_count'] page += 1 except KeyError: break except InvalidPage: break for model in models: if model.__name__ == objects[0]['model']: break (columns, _) = get_columns(model) write_all_in_zip(objects, columns, zip_file, model.__name__) zip_file.close() buffer.flush() # save the zip in `base_dir` base_dir = "csv-exports" file_name = "%s/d.io-export-%s.zip" % (base_dir, topic.slug) # name can be changed by default storage if previous exists file_name = default_storage.save(file_name, ContentFile(buffer.getvalue())) buffer.close() file_name = "%s%s" % (settings.MEDIA_URL, file_name) # save in cache if cache_key is defined if cache_key: utils.topic_cache.set(topic, cache_key, file_name, 60*60*24) return dict(file_name=file_name)