class Neo4jOperate(object): def __init__(self, host, user, password): self.graph = Graph(host, auth=(user, password)) self.relationship_matcher = RelationshipMatcher(self.graph) def create_one_relationship(self, start_node={}, target_node={}, relationship=None, **prop): """ 创建两个节点间关系: 若已存在 :param start_node: Node or dict ==> {"label": "Teacher", "search":{"id": 123}} :param target_node: Node or dict ==> {"label": "Patent", "search":{"ipc_code": "CN102931914A"}} :param relationship: str 关系名 :param prop: 关系属性 :return: True or False """ s_node = self.search_node(start_node) t_node = self.search_node(target_node) if s_node is None or t_node is None: return False relation = self.search_relationship(s_node, t_node, relationship) if relation: return self.update_one_relationship(s_node, t_node, relationship, **prop) return self.create_relation(start_node=s_node, relationship=relationship, target_node=t_node, **prop) def update_one_relationship(self, start_node={}, target_node={}, relation=None, cover=True, **prop): """ 修改两个节点间关系 :param start_node: Node or dict ==> {"label": "Teacher", "search":{"id": 123}} :param target_node: Node or dict ==> {"label": "Patent", "search":{"ipc_code": "CN102931914A"}} :param relation: 关系名 or Relationship :param cover: 新传入的属性值是否覆盖原值, 默认True :param prop: 关系属性 :return: True or False """ if not isinstance(relation, Relationship): relation = self.search_relationship(start_node, target_node, relation) if relation is None: return False for key, value in dict(prop).items(): if key not in relation or cover is True: relation[key] = value else: relation[key] += value try: self.graph.push(relation) return True except Exception as e: print(e) return False def delete_one_relationship(self, start_node={}, target_node={}, relationship=None, properety=None): """ 删除一条节点间关系, 返回关系上的所有属性 :param start_node: dict ==> {"label": "Teacher", "search":{"id": 123}} :param target_node: dict ==> {"label": "Patent", "search":{"ipc_code": "CN102931914A"}} :param relationship: 关系名 :param properety: set 属性集合,若不为空,返回被删除关系的对应属性值: {"weight", "paper",...} :return: 关系属性:{} or False """ relation = self.search_relationship(start_node, target_node, relationship) if relation is None: return True try: # 关系存在 ==> 删除关系,返回关系属性 if properety is not None and len(properety) > 0: back = {} for key in set(properety): if key in relation: back[key] = relation[key] return back self.graph.separate(relation) return True except Exception as e: return False def migrate_relationship(self, source_node={}, self_node={}, target_node={}, r_type=None, **property): """ 断开某节点与当前节点的关系,并将该关系转移到另一节点中, 保存关系上的所有属性 A-B ==> B-C :param source_node: Node or dict ==> {"label": "Teacher", "search":{"id": 123}} :param self_node: Node or dict :param target_node: Node or dict :param r_type: 关系名 :param property: set, 需要转移的属性集合 :return: True or False """ relation = self.delete_one_relationship(source_node, self_node, r_type, **property) return self.create_one_relationship(target_node, self_node, **relation) def search_relationship(self, start_node={}, target_node={}, relationship=None): """ 查找节点间关系 :param start_node: dict ==> {"label": "Teacher", "search":{"id": 123}} :param target_node: dict ==> {"label": "Patent", "search":{"ipc_code": "CN102931914A"}} :param relationship: 关系名 :return: Relationship or None """ # 查找结点 start_node = self.search_node(start_node) if start_node is None: return None target_node = self.search_node(target_node) if target_node is None: return None return self.relationship_matcher.match(nodes=[start_node, target_node], r_type=relationship).first() def search_node(self, search_dict): """ 根据给定参数字典返回节点 :param search_dict: Node or dict ==> {"label": "Teacher", "search":{"id": 123}} :return: Node or None """ if type(search_dict) is Node: return search_dict elif type(search_dict) is dict: if "label" in search_dict and "search" in search_dict: node_search = self.graph.nodes.match( search_dict["label"]).where( **search_dict["search"]).first() return node_search else: return None def update_node(self, search_dict, **prop): """ 根据给定参数字典更新节点属性 :param search_dict: Node or dict ==> {"label": "Teacher", "search":{"id": 123}} :param prop: 节点属性 :return: True or False """ node = self.get_node(search_dict=search_dict, create_if_not_exist=False) if node is None: return False for key, value in prop.items(): node[key] = value try: self.graph.push(node) return True except Exception as e: return False def get_node(self, search_dict, create_if_not_exist=True, **prop): """ 从数据库中搜索节点,若数据库中不存在,创建节点再返回 :param search_dict: Node or dict ==> {"label": "Teacher", "search":{"id": 123}} :param create_if_not_exist: boolean型, 若查询不到节点则 创建, 默认 True :param prop: 节点属性 :return: Node or None ==> 表示创建节点失败 """ node = self.search_node(search_dict) if node is None and create_if_not_exist: return self.create_node(search_dict=search_dict, **prop) # else: # # print("节点已存在") # Logger.info("node exist") return node def create_node(self, search_dict, **prop): node = Node(search_dict["label"], **prop) try: self.graph.create(node) return self.search_node(search_dict) except Exception as e: # print("创建节点失败,%s" % e) return None def create_relation(self, start_node, relationship, target_node, **prop): relation = Relationship(start_node, relationship, target_node) for key, value in dict(prop).items(): relation[key] = value try: self.graph.create(relation) return True except Exception as e: # print("创建关系失败:%s" % e) return False return False def run(self, cql): return self.graph.run(cql).data() def run_without_data(self, cql): return self.graph.run(cql)
class Network(): def __init__(self): self.graph_instance = Graph() self.time = self.update_time(str(datetime.datetime.now())) # Updates current instance of time by system def update_time(self, time): self.time = time # Checks if node exists, if it does not exist - creates new node; else, updates node def add_node(self, link, date_last_updated, frequency): calculated_frequency = convert_frequency_to_hours(frequency) if (not self.check_node_exist(link)): # Create a new node for webpage with an initial calculated frequency n = Node(link, date_last_updated=date_last_updated, frequency=frequency, calculated_frequency=calculated_frequency, link=link) self.graph_instance.create(n) else: # Update existing fields for webpage node n = self.graph_instance.find_one(link) if (n["date_last_updated"] != ""): calculated_frequency = self._update_calculated_frequency( n["date_last_updated"], date_last_updated) n["date_last_updated"] = date_last_updated n["calculated_frequency"] = calculated_frequency n["frequency"] = frequency n.push() return n # Measures calculated frequency from subtracting previous date_last_updated to current date_last_updated (returns time in hours) def _update_calculated_frequency(self, prev_date_updated, new_date_updated): try: prev_date = datetime.datetime.strptime(prev_date_updated, "%Y-%m-%d") new_date = datetime.datetime.strptime(new_date_updated, "%Y-%m-%d") td = new_date - prev_date return td.total_seconds() // 3600 except: return -1 # If the relationship doesn't exist, create a new edge; else, update the tag def add_edge(self, node_u, node_v_link, relationship): self.add_node(node_v_link, "", "") node_v = self.get_node(node_v_link) self.graph_instance.create( Relationship(node_u, "links_to", node_v, tag=relationship)) def check_node_exist(self, link): return len(list(self.graph_instance.find(link))) != 0 def check_relationship_exist(self, node_u, node_v): return len( list( self.graph_instance.match(start_node=node_u, end_node=node_v, rel_type="links_to"))) > 0 def delete_failed_webpages(self, link): if (self.check_node_exist(link) == False): return node = self.get_node(link) self.delete_relationship(node) self.delete_incoming_relationship(node) self.graph_instance.delete(node) def delete_relationship(self, node_u): rels = list( self.graph_instance.match(rel_type="links_to", start_node=node_u, end_node=None)) for r in rels: self.graph_instance.separate(r) def delete_incoming_relationship(self, node_u): rels = list( self.graph_instance.match(rel_type="links_to", end_node=node_u, start_node=None)) for r in rels: self.graph_instance.separate(r) def get_node(self, link): return self.graph_instance.find_one(link) def get_node_information(self, link): check_node = self.graph_instance.data("MATCH (n {link: '" + link + "'} ) RETURN n") if len(check_node) == 0: return {} n = self.get_node(link) date_last_updated = n["date_last_updated"] calculated_frequency = n["calculated_frequency"] frequency = n["frequency"] node_data = {} node_data["date_last_updated"] = date_last_updated node_data["calculated_frequency"] = calculated_frequency node_data["frequency"] = frequency node_data["outlinks"] = self.get_outlinks(link) node_data["inlinks"] = self.get_inlinks(link) return node_data def get_outlinks(self, link): outlink_data = self.graph_instance.data("MATCH (n {link: '" + link + "'} )-->(node) RETURN node") outlinks = [] for o in outlink_data: outlinks.append(o["node"]["link"]) return outlinks def get_inlinks(self, link): inlink_data = self.graph_instance.data("MATCH (n {link: '" + link + "'} )<--(node) RETURN node") inlinks = [] for o in inlink_data: inlinks.append(o["node"]["link"]) return inlinks # Get adjacency matrix from Neo4j and nodes from py2neo def _to_matrix(self): nodes = list(self.graph_instance.node_selector.select()) N = len(nodes) mat = np.zeros((N, N)) # Populate the adjacency matrix for i, a in enumerate(nodes): for j, b in enumerate(nodes): # Use existing function to check for link mat[i, j] = self.check_relationship_exist(a, b) return mat #Iterate over nodes and add pagerank def update_pagerank(self): # Get all the nodes nodes = self.graph_instance.node_selector.select() # Iterate over the result of _pagerank and the nodes for pr, node in zip(self._pagerank(), nodes): # Update the node's pagerank and push back to neo4j node.update(page_rank=pr) self.graph_instance.push(node) # Simple show function to get nodes and display their pagerank def show_pagerank(self, selector=None, link=None): nodes = list(self.graph_instance.node_selector.select()) for node in nodes: if isinstance(link, str): if not list(node.labels())[0] == link: continue elif isinstance(link, (list, tuple)): if not list(node.labels())[0] in link: continue # Get the pageranks for any given list of links (or all) def get_pagerank_dict(self, links=[]): nodes = list(self.graph_instance.node_selector.select()) dct = {} for node in nodes: if isinstance(links, str): if not list(node.labels())[0] == links: continue elif isinstance(links, (list, tuple)): if not list(node.labels())[0] in links: continue dct[list(node.labels())[0]] = node.get('page_rank') return dct # Creates dictionary object with information for ranking API (including page rank) def get_ranking_data(self, links): page_ranks = self.get_pagerank_dict(links) data = {} data["webpages"] = [] for l in page_ranks.keys(): webpage_data = {} # If the node exists if (page_ranks[l] != None): n = self.get_node(l) webpage_data["pageRankValue"] = page_ranks[l] webpage_data["dateLastUpdated"] = n["date_last_updated"] webpage_data["frequency"] = n["frequency"] webpage_data["webpage"] = l else: webpage_data["pageRankValue"] = "NULL" webpage_data["dateLastUpdated"] = "" webpage_data["frequency"] = "" webpage_data["webpage"] = "" data["webpages"].append(webpage_data) return data # Perform pagerank on the adjacency matrix, using the power method def _pagerank( self, alpha=0.85, max_iter=100, # Increase this if we get the non-convergence error tol=1.0e-6, ): # Create a sparse matrix rep. of adjacency matrix mat = scipy.sparse.csr_matrix(self._to_matrix()) n, m = mat.shape # Make a sum matrix S = scipy.array(mat.sum(axis=1)).flatten() # Get non-zero rows index = scipy.where(S <> 0)[0] for i in index: # We need to normlize (divide by sum) mat[i, :] *= 1.0 / S[i] # pr = scipy.ones((n)) / n # initial guess # Get dangling nodes dangling = scipy.array(scipy.where(mat.sum(axis=1) == 0, 1.0 / n, 0)).flatten() for i in range(max_iter): prlast = pr pr = alpha * (pr * mat + scipy.dot(dangling, prlast)) + ( 1 - alpha) * prlast.sum() / n # check if we're done err = scipy.absolute(pr - prlast).sum() if err < n * tol: return pr raise Exception("pagerank failed to converge [%d iterations]" % (i + 1)) # Prioritizer def prioritizer(self, outlinks): # Get remaining time and number of inlink for ol in outlinks: if (not self.check_node_exist(ol)): outlinks.remove(ol) else: self.remaining_time(ol) self.sort_node(outlinks) new_links = sorted( outlinks, key=lambda k: (self.get_node(k)["time_remaining"], self.number_of_inlinks(k))) for ol in new_links: # Update last_crawled_time current = str(datetime.datetime.now()) node = self.get_node(ol) node["last_crawled_time"] = current node.push() return new_links # Get number of inlink def number_of_inlinks(self, outlink): node = self.get_node(outlink) return -len( list( self.graph_instance.match( rel_type="links_to", end_node=node, start_node=None))) # Updates remaining time left for a node to be crawled based on frequency def remaining_time(self, outlink): node = self.get_node(outlink) last_crawled_time = node["last_crawled_time"] if (last_crawled_time == None): node["time_remaining"] = 0 node.push() else: fmt = '%Y-%m-%d %H:%M:%S' current = str(datetime.datetime.now()) start = datetime.datetime.strptime(current[:19], fmt) end = datetime.datetime.strptime(last_crawled_time[:19], fmt) diff = (start - end).total_seconds() / 60.000 / 60.000 diff = float(node["calculated_frequency"]) - diff node["time_remaining"] = diff node.push() # Sort node and fill top 100 def sort_node(self, outlinks): num = len(outlinks) count = 0 nodes = self.graph_instance.data("MATCH (n) RETURN n") for n in nodes: if (not n["n"]["link"] in outlinks): self.remaining_time(n["n"]["link"]) nodes = self.graph_instance.data( "MATCH (n) RETURN n ORDER BY (n.time_remaining) DESC") for n in nodes: link = n["n"]["link"] if (not link in outlinks): outlinks.append(link) count = count + 1 if (count + num > 100): break # Return dictionary object of prioritized links and their priority value def prioritize_dic(self, outlinks): new_links = self.prioritizer(outlinks) data = {} data["prioritizedLinks"] = [] p_value = 1 for l in new_links: l_data = {} l_data["link"] = l l_data["priority_value"] = p_value * 10 data["prioritizedLinks"].append(l_data) p_value = p_value + 1 return data
class DBConc(object): def __init__(self): self.graph = Graph("http://liublack.cn:7474", auth=("neo4j", "200001")) def search_one(self, label, name): node = self.graph.nodes.match(label, name=name).first() if label == 'disease': return Disease(Node) elif label == 'symptom': return Symptom(node) return None def search(self, label, **keys): pass def exist(self, label, name): node = self.graph.nodes.match(label, name=name).first() return node is not None def insertDisease(self, disease): if self.exist('disease', disease['name']): return if type(disease) != Disease: raise Exception('type(disease) not equals Disease') diseaseNode = Node('disease', **disease.data) self.graph.create(diseaseNode) def insertSymptom(self, symptom): if self.exist('symptom', symptom['name']): return if type(symptom) != Symptom: raise Exception('type(symptom) not equals Symptom') symptomNode = Node('symptom', **symptom.data) self.graph.create(symptomNode) def deleteRelationships(self, rtype): try: subG = Subgraph(relationships=self.graph.relationships.match( r_type=rtype)) # self.graph.create(subG) self.graph.separate(subG) except ValueError as e: print(e) def establishRelationship(self, left, right, rtype, pname, correlation=None): self.deleteRelationships(rtype) nodes = self.graph.nodes.match(left) for lnode in nodes: print(type(lnode)) names = lnode[pname] for name in names: rnode = self.graph.nodes.match(right, name=name).first() if rnode is None: continue if self.graph.match_one((lnode, rnode), r_type=rtype) is None: if correlation is not None: # 计算相关性 try: value = correlation.similarity( lnode['name'], rnode['name']) value = (value + 1) / 2 except KeyError as e: print(e) value = 1 else: value = 1 r = Relationship(lnode, rtype, rnode, value=value) self.graph.create(r) def establishAllRelationship(self, correlation=None): self.establishRelationship('disease', 'disease', 'd-d', 'relatedDiseases', correlation=correlation) self.establishRelationship('disease', 'symptom', 'd-s', 'typicalSymptoms', correlation=correlation) self.establishRelationship('symptom', 'symptom', 's-s', 'relatedSymptoms', correlation=correlation) def getDSCorrelation(self, label='correlate', alpha=0.3, maxDepth=5, wvmodel=None): symptomSet = set() diseaseSet = set() result = {} with open(Configurer.SYMPTOM_DICT_PATH, 'r') as f: for line in f.readlines(): symptomSet.add(line.split(' ')[0]) logging.info('症状集加载完毕') with open(Configurer.DISEASE_DICT_PATH, 'r') as f: for line in f.readlines(): diseaseSet.add(line.split(' ')[0]) logging.info('疾病集合加载完毕') f = open(Configurer.DS_CORRELATION_PATH, 'w') for disease in diseaseSet: result[disease] = {} for symptom in symptomSet: result[disease][symptom] = '' try: statement = 'match (p1:disease {name: "%s"}), (p2:symptom {name:"%s"}), p = shortestpath((p1)-[*..%d]-(p2)) return p' % ( disease, symptom, maxDepth) cursor = self.graph.run(statement) path = cursor.current['p'] if cursor.forward() else None except Exception as e: path = None if path: value, frac, n = (0, 0, 0) for entity in walk(path): if isinstance(entity, Relationship): value += entity['value'] frac += (1 + alpha)**n n += 1 value /= frac result[disease][symptom] = (value, 'shortest path') elif wvmodel: try: value = wvmodel.similarity(disease, symptom) value = (value + 1) / 4 result[disease][symptom] = (value, 'w2vModel') except KeyError as e: logging.warning(str(e)) if result[disease][symptom] == '': value = 0.1 result[disease][symptom] = (0.1, 'cannot compute') logging.info('%s - %s - %s by %s\n' % (disease, result[disease][symptom][0], symptom, result[disease][symptom][1])) f.write(str(result[disease]) + '\n') f.flush() # f = open(Configurer.DS_CORRELATION_PATH, 'w') f.write('\n\n\n\n' + str(result)) return result def clearDB(self): self.graph.delete_all() def getSymptomsFromDisease(self, diseaseList): symptomSet = set() for disease in diseaseList: node = self.graph.nodes.match('disease', name=disease).first() rels = self.graph.match((node, ), r_type='have') for r in rels: symptomSet.add(r.end_node['name']) return symptomSet def getDiseaseFromSymptoms(self, symptomList): diseaseSet = set() for symptom in symptomList: node = self.graph.nodes.match('symptom', name=symptom).first() rels = self.graph.match((None, node), r_type='have') for r in rels: diseaseSet.add(r.start_node['name']) return diseaseSet def existHaveRelationship(self, disease, symptom): ndisease = self.graph.nodes.match('disease', name=disease).first() nsymptom = self.graph.nodes.match('symptom', name=symptom).first() if ndisease is None or nsymptom is None: return False rel = self.graph.match_one((ndisease, nsymptom), r_type='have') return rel is not None def getDiseaseDetails(self, diseaseList): details = [] for disease in diseaseList: node = self.graph.nodes.match('disease', name=disease).first() diseaseItem = dict() for pname in self.diseaseItemProerties: diseaseItem[pname] = node[pname] details.append(diseaseItem) return details def getRelatedSymptoms(self, symptomList): symptoms = set() for symptom in symptomList: node = self.graph.nodes.match('symptom', name=symptom).first() rels = self.graph.match((node, None), r_type='relate') relatedSymptoms = [rel.end_node['name'] for rel in rels] symptoms.update(relatedSymptoms) return list(symptoms)
class GraphdbClient(object): @LazyProperty def __db_host__(self): return self.db_host @LazyProperty def __db_port__(self): return self.db_port @LazyProperty def __db_user__(self): return self.db_user @LazyProperty def __db_password__(self): return self.db_password def get_db(self): return self.graph def __init__(self, db_host='localhost', db_port=27017, db_user='', db_password=''): self._setDatabase(db_host, db_port, db_user, db_password) def _setDatabase(self, db_host, db_port, db_user, db_password): self.db_host = db_host self.db_port = db_port self.db_user = db_user self.db_password = db_password self.graph = Graph(host=self.db_host, bolt=True, http=self.db_port, \ user=self.db_user, password=self.db_password) self.log = LogHandler("graphdb_client", level=20) def insert_or_update_node(self, label, key, vals={}): """ Insert if the node is not in the dataset or update the existed relationship. Parameters: label: label; key: key; properties: json string. Example: insert_or_update_node("Test", "yexuliang", {"Test":"123123", "has_crawled" : 1}) """ nd = Node(label, neo_id="{}_{}".format(label.lower(), key)) self.graph.merge(nd) for k, v in vals.iteritems(): val = list() if isinstance(v, list): if isinstance(v[0], dict): for elem in v: val.append(json.dumps(elem)) else: val = v nd[k] = val self.graph.push(nd) return nd def insert_or_update_relation(self, rel_type, st_nd, end_nd, vals={}): """ Insert if the relationship is not in the dataset or update the existed relationship. Parameters: rel_type: type of relationship; st_nd: start node; end_nd: end node; bidirectional: True or False; vals: json string. Example: insert_or_update_relation("T_Rel", Node("Test", uid="yexuliang"), Node("Test", uid="3310858"), {"clue":["id", "screen_name"]}) """ rel = Relationship(st_nd, rel_type, end_nd) self.graph.merge(rel) for k, v in vals.iteritems(): rel[k] = v self.graph.push(rel) def find_node_by_id(self, label, key): """ Find nodes by neo_id. Parameters: label: label; key: neo_id. Example: find_node_by_id("Douban", "Weibo_3513921") """ nds = self.graph.data( "MATCH (nd:{} {{neo_id: '{}'}}) RETURN nd".format(label, key)) if nds: return nds[0]['nd'] else: return None def find_node_by_rel(self, nd_label, nd_info, rel_type, rel_info={}, is_count=False, limit=None, skip=None): """ Find node by conditions. Parameters: label: label; nd_info: dictionary where key is the property_name and value is the property_val; rel_info: the filter of aligned info is_count: if False, return nodes; else return num of queried results limit: default None. limited number of query results. skip: for return results by segment Example: find_node_by_rel("Douban", {"name":"=~'ta'"}, 'ALIGN', {'ID': '>.9'}) """ cql = "MATCH (nd:{})-[a:{}]-(res)".format(nd_label, rel_type) nd_condition_clause = condition_clause_format('nd', nd_info) rel_cond_clause = condition_clause_format('a', rel_info) condition_clause = nd_condition_clause if rel_cond_clause: condition_clause += " AND " + rel_cond_clause if is_count: ret_clause = " RETURN count(nd) as num" else: ret_clause = " RETURN res" if condition_clause: cql += " WHERE " + condition_clause + ret_clause else: cql += ret_clause if skip and isinstance(skip, int): cql += " SKIP {}".format(skip) if limit and isinstance(limit, int): cql += " LIMIT {}".format(limit) self.log.debug(u"Query: %s" % cql) nds = self.graph.run(cql) if is_count: return nds graph_nds = list() for nd in nds: graph_nds.append(nd['res']) if graph_nds: return graph_nds else: return None def find_node_by_property(self, label, nd_info=dict(), is_count=False, limit=None, skip=None): """ Find node by conditions. Parameters: label: label; nd_info: dictionary where key is the property_name and value is the property_val; is_count: if False, return nodes; else return num of queried results limit: default None. limited number of query results. skip: for return results by segment Example: find_node_by_property("Douban", {"name":"=~'ta'"}, False) """ cql = "MATCH (nd:{})".format(label) condition_clause = condition_clause_format('nd', nd_info) if is_count: ret_clause = " RETURN count(nd) as num" else: ret_clause = " RETURN nd" if condition_clause: cql += " WHERE " + condition_clause + ret_clause else: cql += ret_clause if skip and isinstance(skip, int): cql += " SKIP {}".format(skip) if limit and isinstance(limit, int): cql += " LIMIT {}".format(limit) self.log.debug(u"Query: %s" % cql) nds = self.graph.data(cql) if is_count: return nds graph_nds = list() for nd in nds: graph_nds.append(nd['nd']) if graph_nds: return graph_nds else: return None # @deprecated(find_node_by_property) # def find_node_by_fuzzy_property(self, label, nd_info, limit=None): # """ # Find node by fuzzy conditions. # Parameters: # label: label; # nd_info: dictionary where key is the property_name and value is the property_val; # limit: default None. limited number of query results. # Example: find_node_by_fuzzy_property("Douban", {"name":"ta"}, 1) # """ # cql = "MATCH (nd:{})".format(label) # condition_clause = fuzzy_condition_clause_format('nd', nd_info) # if condition_clause: # cql += " WHERE "+condition_clause+" RETURN nd" # else: # cql += " RETURN nd" # if limit: # cql += " LIMIT {}".format(limit) # self.log.info(u"Query: %s"%cql) # nds = self.graph.data(cql) # if nds: # return nds # else: # return None # @deprecated(find_node_by_property) # def find_node_by_acc_property(self, label, nd_info, limit=None): # """ # Find node by accurate conditions. # Parameters: # label: label; # nd_info: dictionary where key is the property_name and value is the property_val; # limit: default None. limited number of query results. # Example: find_rel_by_acc_property('id', {'label':'Douban', 'name':'tada'},\ # {'label':'Weibo', 'nick_name':'tadamiracle'}) # """ # cql = "MATCH (nd:{})".format(label) # condition_clause = acc_condition_clause_format('nd', nd_info) # if condition_clause: # cql += " WHERE "+condition_clause+" RETURN nd" # else: # cql += " RETURN nd" # if limit: # cql += " LIMIT {}".format(limit) # self.log.info(u"Query: %s"%cql) # nds = self.graph.data(cql) # if nds: # return nds # else: # return None def find_rel_by_property(self, st_nd, end_nd, rel_type='ALIGN', rel_info=dict(), is_count=False, limit=None, skip=None): """ Find relationship by conditions. Parameters: st_nd: start node info in dictionary where key is the property_name and value is the property_val. end_nd: end node info in dictionary where key is the property_name and value is the property_val. rel_type: the type of relationship rel_info: the filter of relationship info is_count: if False, return relationships; else return count(rel) limit: default None. limited number of query results skip: for return results by segment Example: find_rel_by_property({'label':'Douban', 'name':"=~'.*ta.*'"}, {'label':'Weibo'}) """ if 'label' in st_nd and 'label' in end_nd: cql = "MATCH (st_nd:{})-[rel:{}]-(end_nd:{})".format( st_nd['label'], rel_type, end_nd['label']) else: self.log.warning( u"No specific relation type in 'find_rel_by_property'") return None st_cond_clause = condition_clause_format('st_nd', st_nd) end_cond_clause = condition_clause_format('end_nd', end_nd) rel_cond_clause = condition_clause_format('rel', rel_info) condition_clause = st_cond_clause if end_cond_clause: condition_clause += " AND " + end_cond_clause if rel_info: condition_clause += " AND " + rel_info if is_count: ret_clause = " RETURN count(rel) as num" else: ret_clause = " RETURN st_nd, rel, end_nd" if condition_clause: cql += " WHERE " + condition_clause + ret_clause else: cql += ret_clause if skip and isinstance(skip, int): cql += " SKIP {}".format(skip) if limit and isinstance(limit, int): cql += " LIMIT {}".format(limit) self.log.debug(u"Query: %s" % cql) rels = self.graph.data(cql) graph_rels = list() for rel in rels: graph_rels.append(rel['rel']) if graph_rels: return graph_rels else: return None # @deprecated(find_rel_by_property) # def find_rel_by_fuzzy_property(self, rel_type, st_nd, end_nd, rel_info, limit=None): # """ # Find relationship by fuzzy conditions. # Parameters: # rel_type: relation type; # st_nd: start node info in dictionary where key is the property_name and value is the property_val. # end_nd: end node info in dictionary where key is the property_name and value is the property_val. # limit: default None. limited number of query results. # Example: find_rel_by_fuzzy_property('id', {'label':'Douban', 'name':'ta'}, {'label':'Weibo'}, 2) # """ # if 'label' in st_nd and 'label' in end_nd: # cql = "MATCH (st_nd:{})-[a:ALIGN]-(end_nd:{})".format(st_nd['label'], end_nd['label']) # else: # self.log.warning(u"No specific relation type in 'find_rel_by_fuzzy_property'") # return None # st_cond_clause = fuzzy_condition_clause_format('st_nd', st_nd) # end_cond_clause = fuzzy_condition_clause_format('end_nd', end_nd) # condition_clause = st_cond_clause # if end_cond_clause: # condition_clause += " AND "+end_cond_clause # if rel_type: # condition_clause += " AND '{}' in a.align_msg".format(rel_type) # if condition_clause: # cql += " WHERE "+condition_clause+" RETURN st_nd, a, end_nd" # else: # cql += " RETURN st_nd, a, end_nd" # if limit: # cql += " LIMIT {}".format(limit) # self.log.info(u"Query: %s"%cql) # rels = self.graph.data(cql) # if rels: # return rels # else: # return None # @deprecated(find_rel_by_property) # def find_rel_by_acc_property(self, rel_type, st_nd, end_nd, limit=None): # """ # Find relationship by accurate conditions. # Parameters: # rel_type: relation type; # st_nd: start node info in dictionary where key is the property_name and value is the property_val. # end_nd: end node info in dictionary where key is the property_name and value is the property_val. # limit: default None. limited number of query results. # Example: find_rel_by_fuzzy_property('id', {'label':'Douban', 'name':'ta'}, {'label':'Weibo'}, 2) # """ # if 'label' in st_nd and 'label' in end_nd: # cql = "MATCH (st_nd:{})-[a:ALIGN]->(end_nd:{})".format(st_nd['label'], end_nd['label']) # else: # self.log.warning(u"No specific relation type in 'find_rel_by_acc_property'") # return None # st_cond_clause = acc_condition_clause_format('st_nd', st_nd) # end_cond_clause = acc_condition_clause_format('end_nd', end_nd) # condition_clause = st_cond_clause # if end_cond_clause: # condition_clause += " AND "+end_cond_clause # if rel_type: # condition_clause += " AND '{}' in a.align_msg".format(rel_type) # cql += " WHERE "+condition_clause+" RETURN st_nd, a, end_nd" # if limit: # cql += " LIMIT {}".format(limit) # self.log.info(u"Query: %s"%cql) # rels = self.graph.data(cql) # if rels: # return rels # else: # return None def delete(self, subgraph): if isinstance(subgraph, Relationship): self.graph.separate(subgraph) else: self.graph.delete(subgraph) def clear(self): self.graph.delete_all()