if __name__ == '__main__': data = ProcessXML('Ara2009.xml') graph = Graph('bolt://palsson.di.uminho.pt:6092', auth=('neo4j', '123')) compartments = data.getCompartments() species = data.getSpecies() reactions = data.getReactions() dicReact = data.getReactionDict() ''' nodeC = Node("Compartment", id='C', name='Cytosol', model='Arabidopsis') graph.create(nodeC) nodeM = Node("Compartment", id='M', name='Mitochondrion', model='Arabidopsis') graph.create(nodeM) nodeE = Node("Compartment", id='E', name='Extracellular', model='Arabidopsis') graph.create(nodeE)''' matcher = NodeMatcher(graph) nodeModel = Node('Model', id='Arabidopsis', name='Arabidopsis') for spec in species.keys(): try: if '_mit' in species[spec][0]: id = species[spec][0].replace('_mit', '') nodeSpec = matcher.match('Species').where( '_.id="' + id + '"').where('_.compartment="' + 'M' + '"') relation = Relationship(nodeModel, 'CONTAINS', nodeSpec.first()) graph.create(relation) elif 'x_' in species[spec][0]: id = species[spec][0].replace('x_', '') nodeSpec = matcher.match('Species').where( '_.id="' + id + '"').where('_.compartment="' + 'E' + '"')
import pymysql from mysqlservice import * from bookService import * from py2neo import Graph,Relationship,NodeMatcher,Node from keywordService import * graph1 = Graph(host='http://112.74.160.185', http_port=7474, user='******', password='******') graph2 = Graph('http://112.74.160.185:7474/browser/', user='******', password='******') graph3 = Graph('https://localhost:7473/browser/', user='******', password='******') graph4 = Graph(password='******') graph = graph2.begin() # 打开图数据库,未打开时不能进行操作 matcher = NodeMatcher(graph) #使用NodeMatcher来查找数据 def insertAuthorFromFile(): authorFile = open('D:/book_infor/data/nr_nouns1.txt', 'r', encoding='UTF-8-sig') root = getUsernameAndPassword() conn = pymysql.connect( host=root['host'], port=3306, user=root['username'], password=root['password'], database='test', charset='utf8' ) cursor =conn.cursor() for author in authorFile.readlines(): if len(author.split(':'))>1: nation = author.split(':')[0] authorname = author.split(':')[1] print(nation + ";" + authorname) sqlStr = "INSERT INTO author(author_name ,nation) VALUES(%s,%s)"
def delete_node_by_id(node_id: str, graph: Graph) -> bool: tx = graph.begin() node = NodeMatcher(graph).get(node_id) tx.delete(node) tx.commit()
app = Flask(__name__) cors = CORS(app) api = Api(app) # Allow POST requests with JSON app.config['CORS_HEADERS'] = 'Content-Type' config = SafeConfigParser() assert os.path.exists(os.getcwd() + '/conf/settings.cfg') config.read(os.getcwd() + '/conf/settings.cfg') graph_adapter = GraphAdapter() graph_dao = Graph(GRAPH_URI, auth=(config.get('neo4j', 'username'), (config.get('neo4j', 'password')))) graph_repository = GraphRepository(graph_adapter, NodeMatcher(graph_dao), RelationshipMatcher(graph_dao)) # GraphAdapter(GRAPH_URI,config.get('neo4j','username'),config.get('neo4j','password')) class PathEncoder(JSONEncoder): def default(self, o): return o.__dict__ app.json_encoder = PathEncoder artist_fields = {'name': fields.String, 'id': fields.String} class ArtistsIdApi(Resource): @marshal_with(artist_fields)
agent = Node("http://www.w3.org/ns/prov/Agent", name=agent_name, email=agent_email, git=agent_git) tx.create(agent) script_repo = input( "please enter the repository url for the script used to generate this collection: " ) script_version = input("please enter the commit id: ") script = Node("http://underlay.org/ns/Script", repository=script_repo, version=script_version) tx.create(script) generated_relation = Relationship( collection, "http://www.w3.org/ns/prov/WasGeneratedBy", script) attribution_relation = Relationship( collection, "http://www.w3.org/ns/prov/WasAttributedTo", agent) tx.create(generated_relation) tx.create(attribution_relation) tx.commit() if __name__ == "__main__": graph_db = Graph(uri, auth=(user, password)) graph_db.delete_all() matcher = NodeMatcher(graph_db) fill_schema()
@author: 宋小兰 """ import json from py2neo import Graph, Node, Relationship, NodeMatcher graph = Graph(host='localhost', auth=('neo4j', 'neo4j')) lines = [] # 第一步:定义一个列表, 打开文件 with open("清热药完整属性_end.txt", 'rt', encoding="utf-8") as f: for row in f.readlines(): # 第二步:读取文件内容 if row.strip().startswith("//"): # 第三步:对每一行进行过滤 continue lines.append(row) # 第四步:将过滤后的行添加到列表中. count = 0 for i in range(2063, 2919): matcher = NodeMatcher(graph) n = matcher.match("中药", id=i) for node in n: #print(node) #print(count) dict = json.loads( lines[count], strict=False ) #将列表中的每个字符串用某一个符号拼接为一整个字符串,用json.loads()函数加载,这样就大功告成啦!! #print(dict.keys()) node['other_name'] = dict['【别名】'] node['pinyin_name'] = dict['【汉语拼音】'] node['chengfen'] = dict['【成份】'] node['jiagongcaiji'] = dict['【加工采集】'] node['yaocaijianbie'] = dict['【药材鉴别】']
class GeneralFlow(BaseFlow): def __init__(self, cf, name): self.name = name self.cf = cf self._cf = self.cf[name] self.graph = neoClient(cf) self.nodeMatcher = NodeMatcher(self.graph) @property def pageSize(self): return self._cf.get('pagesize') if self._cf.get('pagesize', None) else 200 def paging(self): total = self.count() self.log(self.name, 'count', total) return range(ceil(total / self.pageSize)) def getId(self, data, ids, sign): if ',' == sign: idlist = [] for id in ids: idlist.append(data.get(id, '')) return ('_'.join(idlist)).strip('_') elif '||' == sign: for id in ids: value = data.get(id, '') if value: self.log(self.name, id, value) return value elif '--' == sign: for id in ids[1:]: idlist = [] idlist.append(data.get(id, 0.0)) return data.get(ids[0], 0.0) - sum(idlist) def getValues(self, data): dataDict = {} for field in self._cf['fields']: if field[0] == 'id': dataDict[field[0]] = self.getId( data, field[1].split(self._cf.get('sign', '$$')), self._cf.get('sign', '$$')) if self._cf.get( 'sign', '$$') in field[1] else getValueFromPath( self.name, field[0], field[1], data, field[2]) else: dataDict[field[0]] = getValueFromPath(self.name, field[0], field[1], data, field[2]) for field in self._cf['orderfields']: if len(dataDict[field]) > 1: dataDict[field] = sorted(dataDict[field], reverse=True) logger.warning('[source dataDict={}]'.format(dataDict)) return dataDict def fetch(self): if isinstance(self, MongoGeneralFlow): self.connect() messages = [] for i in self.paging(): self.delay() messages = self.pagingFind(self.pageSize, i * self.pageSize) for message in messages: yield self.getValues(message) @retry(stop=stop_after_attempt(2)) def nodeMatch(self, mData): data = self.nodeMatcher.match(self._cf['nodeName'], id=mData['id']).first() data = dict(data) if data else {} try: data.pop('__v') except: pass return data def matcher(self, mData): gDate = self.nodeMatch(mData) gDate = self.order(gDate) logger.warning('[source gDate={}]'.format(gDate)) if not gDate or (json.dumps(mData, ensure_ascii=False) != json.dumps( gDate, ensure_ascii=False)): self.record(mData, gDate) #else: # logger.error('[success ={}]'.format(gDate['id'])) def check(self): pass def order(self, gDate): dataDict = {} for field in self._cf['fields']: dataDict[field[0]] = gDate.get(field[0]) if gDate.get( field[0], None) is not None else format_kongoutput( field[2], gDate.get(field[0])) for field in self._cf['orderfields']: if len(dataDict[field]) > 1: dataDict[field] = sorted(dataDict[field], reverse=True) return dataDict def work(self): for message in self.fetch(): self.log(self.name, 'message', message, level=self._cf['level']) self.matcher(message) self.log(self.name, 'end', 'ok')
from py2neo import Graph, Node, Relationship, NodeMatcher import csv graph = Graph("http://localhost:7474", username="******", password='******') graph.delete_all() with open("XXX.csv", "r", encoding="utf-8") as csvfile: #读取csv文件,返回的是迭代类型 reader = csv.reader(csvfile) for line in reader: #node = graph.data('MATCH (p:Person) return p') flag = 0 matcher = NodeMatcher(graph) a = matcher.match("Director", name=line[0]).first() b = matcher.match("Actor", name=line[1]).first() if (a == None): flag = 1 a = Node('Director', name=line[0]) if (b == None): flag = 1 b = Node('Actor', name=line[1]) if (flag == 1): r = Relationship(a, 'Cooperate', b) r['times'] = 1 s = a | b | r print(s) graph.create(s) if (flag == 0): relation = graph.match_one([a, b]) print(a) print(b)
def match_node(self, node_model: NodeModel): matcher = NodeMatcher(self).match( type(node_model).__name__, **node_model.get_properties()) return list(matcher)
dir = 'IOT_' + str(year) + '_ixi/' print(year) params = json.load(open(__location__ + dir + 'file_parameters.json')) Afile = params['files']['A'] A = pd.read_csv(__location__ + dir + Afile['name'], sep='\t', memory_map=True, skiprows=int(Afile['nr_header']), index_col=list(range(int(Afile['nr_index_col'])))) Aindex = np.array(list(r for r in A.index)) A = sp.csr_matrix(A) #sudo kill -9 $(pgrep neo4j ) selector = NodeMatcher(graph) countries = selector.match("Country") industry = selector.match("Industry") ## rm all relationships ''' relation = RelationshipMatcher(graph) for i in list(relation.match()): graph.delete(i) sudo kill -9 $(pgrep neo4j ) sudo kill -9 $(pgrep python ) ## remove all industries for i in list(list(industry)):
def __init__(self): self.graph = Graph("bolt://localhost:7687", username="******", password='******') self.node_matcher = NodeMatcher(self.graph) self.rel_matcher = RelationshipMatcher(self.graph) self.search_type = "" self.name = ""
#Matching用法 from py2neo import Graph, NodeMatcher, RelationshipMatcher graph = Graph(password='******') matcher = NodeMatcher(graph) print(matcher.match("老师", name="冯小刚").first()) print(list(matcher.match("学生").where("_.name =~ '二.*'"))) print( list( matcher.match("学生").where("_.name =~ '二.*'").order_by("_.age").limit( 3))) rMatch = RelationshipMatcher(graph) print(list(rMatch.match(r_type='师生乱情')))
from py2neo import Graph, NodeMatcher import uuid graph = Graph(password="******") matcher = NodeMatcher(graph) txn = graph.begin() nodes = graph.run( "MATCH (n) WHERE NOT n:Placeholder WITH COUNT(n) AS nodes RETURN nodes" ).data()[0]['nodes'] for i in range(nodes): node = matcher.get(i) if node is not None and node['id'] is None: node['id'] = str(uuid.uuid4()) graph.push(node) txn.commit()
from py2neo import Graph, Node, Relationship, NodeMatcher, RelationshipMatcher import re graph = Graph("http://*****:*****@")
__author__ = "ZhouLiao" from pyzil.account import Account from pyzil.crypto.zilkey import is_bech32_address from py2neo import Graph, Node, Relationship, NodeMatcher, RelationshipMatcher # 将所有的交易中的fromAddr 和toAddr 新增其对应的bech32 格式的地址 uri, user, pwd = "http://localhost:7474", "neo4j", "liaozhou1998" graph = Graph(uri, username=user, password=pwd) matcher_node = NodeMatcher(graph) def getBech32(address): if is_bech32_address(address): return address account = Account(address=address) return account.bech32_address def printf(message): print(message) with open("../log/update.log", "a") as file: file.write(message + '\r') # txs = matcher_node.match("Tx") accounts = matcher_node.match("Account") # for tx in txs: # fromAddr, toAddr = tx["fromAddr"], tx["toAddr"] # fromAddrNew, toAddrNew = getBech32(fromAddr), getBech32(toAddr) # tx.update({"fromAddrNew": fromAddrNew, "toAddrNew": toAddrNew}) # printf(fromAddr + "," + fromAddrNew + "," + toAddr + "," + toAddrNew)
import requests from lxml import etree from py2neo import Graph, Node, Relationship, NodeMatcher, RelationshipMatcher import time import random graph = Graph('http://10.102.24.46:9292', username='******', password='******') matcher = NodeMatcher(graph) rela_matcher = RelationshipMatcher(graph) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36', # 'cookie': "my_mbcookie=9381740248; gr_user_id=aadb4e55-834c-4b15-9629-0ad5b46be83f; grwng_uid=8afb463c-2554-482a-b175-584868f76b76; _ga=GA1.2.1853337866.1539237768; deny_ip=UWMHa1ViACtWYFBoVGAHLgBvAzJReAg0VmUGMw%3D%3D; g_step_tmp=1; _pk_ref.2.9731=%5B%22%22%2C%22%22%2C1541396804%2C%22http%3A%2F%2Fwww.molbase.com%2Fen%2F488-93-7-moldata-179475.html%22%5D; _pk_ses.2.9731=*; ad747e1972c640de_gr_session_id=7e1a2bb0-a122-4b3f-a364-e2ecd53690c0; _gid=GA1.2.1927091434.1541396804; ad747e1972c640de_gr_session_id_7e1a2bb0-a122-4b3f-a364-e2ecd53690c0=true; current_user_key=689615e92a91e9b63ff65108985e2782; count_views_key=113c494d84d1f9713d661963f783d079; ECM_ID=rf5ko34u1lcn6vn2vd67s0t061; ECM_ID=rf5ko34u1lcn6vn2vd67s0t061; Hm_lvt_16ee3e47bd5e54a79fa2659fe457ff1e=1539237692,1539323463,1541127887,1541396811; _pk_id.2.9731=b06a2f06be918374.1539237692.7.1541399717.1541396804.; Hm_lpvt_16ee3e47bd5e54a79fa2659fe457ff1e=1541399717; lighting=eyJpdiI6IjFJNnJQUTNuUjh0TzQ3WFZcL1ZlOG13PT0iLCJ2YWx1ZSI6IlhYK1UyVW50ekx6SzVnTWlScXkxbzUwTEJCOW1Eb1BtdEIxTXRaWnE1SzR6RTNrM1JJMXRcL0tpRCtKSmgxaHptNTB2VTdTTnl5OFZqOTZ6V05INDJSZz09IiwibWFjIjoiMTIxYTRhZWJiZjJlYjAyYzg1MmFjNzUzZmZiODg1OTJlOTE1NGI2YzZkMzYxNmY1MGRlMTU4NTg5Y2ViZmQwZiJ9", 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.9', # 'Host': 'data.huaxuejia.cn', 'Accept-Encoding': 'gzip, deflate' } def set_imgs(cas): num = cas.split('-')[0] num = num[-3:] if len(num) > 3 else num return 'http://data.huaxuejia.cn/casimg/%s/%s.png' % (num, cas), 'http://data.huaxuejia.cn/casimg/%s/%s.gif' % ( num, cas) def loading_updownstream(content): eles = etree.HTML(content) updowns = eles.xpath('//ul[@class="list-inline mb20 udStream_list"]') up = []
directors = RelatedFrom('Person', 'DIRECTED') productors = RelatedFrom('Person', 'PRODUCED') list = [(a.name, a.born) for a in Person.match(graph).limit(3)] graph = Graph(password='******') a = Node('Person', name='Alice', age=21, location='广州') b = Node('Person', name='Bob', age=22, location='上海') c = Node('Person', name='Mike', age=21, location='北京') r1 = Relationship(a, 'KNOWS', b) r2 = Relationship(b, 'KNOWS', c) graph.create(a) graph.create(r1) graph.create(r2) matcher = NodeMatcher(graph) # 等价于 matcher = graph.nodes persons = matcher.match("Person", age='21') person = persons.first() matcher.match("Person").where("_.name =~ 'K.*").order_by("_.name").limit(3) # OGM 查询 对象和 Node 的映射 person = Person.match(graph).where(age=21).first() print(person.__ogm__.node) person.age = 22 print(person.__ogm__.node) graph.push(person)
class UserService: def __init__(self): self.matcher = NodeMatcher(db) def get_user(self, username): node = self.matcher.match('User', username=username).first() return node def create_user_properties(self, username, sexo, limpo, organizado, comportamento, responsavel, gosta_animais, fuma, bebe): if self.get_user(username) == None: node = Node("User", username=username, sexo=sexo, limpo=limpo, organizado=organizado, comportamento=comportamento, responsavel=responsavel, gostaDeAnimais=gosta_animais, fuma=fuma, bebe=bebe) db.create(node) return True else: return False def create_musical_taste(self, username, pop, rock, brega_recife, funk, mpb, indie, eletronica, hip_hop, rap, metal, jazz, folk, r_b, classica): musical_taste = Node('Musical', usernameDono=username, pop=pop, rock=rock, bregaRecife=brega_recife, funk=funk, mpb=mpb, indie=indie, eletronica=eletronica, hipHop=hip_hop, rap=rap, metal=metal, jazz=jazz, folk=folk, rb=r_b, classica=classica) user = self.get_user(username) if (user != None): musical_taste = Relationship(user, "temGosto", musical_taste) db.create(musical_taste) return True else: return False def get_musical_taste(self, username): return self.matcher.match('Musical', usernameDono=username).first() def create_movie_taste(self, username, terror, suspense, comedia, comedia_romantica, romantico, ficcao_cientifica, acao, anime, documentario, drama, policiais, besteirol_americano): movie_taste = Node('Cinefilo', usernameDono=username, terror=terror, suspense=suspense, comedia=comedia, comediaRomantica=comedia_romantica, romantico=romantico, ficcaoCientifica=ficcao_cientifica, acao=acao, anime=anime, documentario=documentario, drama=drama, policiais=policiais, besteirolAmericano=besteirol_americano) user = self.get_user(username) if (user != None): movie_taste = Relationship(user, "temGosto", movie_taste) db.create(movie_taste) return True else: return False def get_movie_taste(self, username): return self.matcher.match('Cinefilo', usernameDono=username).first()
from handian_tcm import neo_graph from py2neo import NodeMatcher from django.conf import settings import math matcher = NodeMatcher(neo_graph) def get_info_from_node(instance, model_type, page=1): """ :Neo4j中获取两级节点信息 :调用示例=> get_info_from_node(term_instance, 'Term', page=1) :return dict/None """ a = matcher.get(instance.neo_id) if not a: return None START_ID = a.identity return_json = { 'data': [], 'links': [] } return_json['data'].append({ 'name': a['name'], 'id': START_ID, 'label': list(a.labels)[0]
def __init__(self): self.matcher = NodeMatcher(db)
def __init__(self, cf, name): self.name = name self.cf = cf self._cf = self.cf[name] self.graph = neoClient(cf) self.nodeMatcher = NodeMatcher(self.graph)
# créé une relationship FRIENDS = Relationship.type("FRIENDS") g.merge(FRIENDS(a, c), "Person", "name") # push les creations tx.create(a) tx.create(b) tx.create(c) tx.create(ab) # test exist retourne true tx.exists(ab) # trouver un node en particulier matcher = NodeMatcher(g) print(matcher.match("Person", name="Alice").first()) # nombre de Person ayant age = 33 print(len(matcher.match("Person", age=33))) # lister max 3 personnes personnes dont le nom commence par c print( list(matcher.match("Person").where("_.name =~ 'A.*'").order_by("_.name").limit(3)) ) # - Savoir faire des relations # - Savoir mettre des arguments sur les noeuds # - Savoir mettre des arguments sur les relations
def getMovieData(title): matcher = NodeMatcher(graph) movie = matcher.match("Movie", title={title}).first() return jsonify(movie)
def add_taboo(): dingdang_taboo_file = open('../生成文件/叮当快药中的存在的禁忌关系.txt','a',encoding='utf-8') dingdang_taboo_file.seek(0) dingdang_taboo_file.truncate()#清空文件 dingdang_taboo_file.write('\t'.join(['SKU','药品名称','关系','禁忌实体','\n'])) dingdang_taboo_list = [] graph = Graph("http://localhost:7474", username="******", password='******') matcher = NodeMatcher(graph) f_xunyiwenyao = pd.read_csv('../data/xunyiwenyao/寻医问药中药品名的禁忌关系.csv', encoding='utf-8') f_dingdang = pd.read_excel('../data/dingdangkuaiyao/与创新工场共创知识图谱的试实验数据.xlsx') yaopingming_xunyiwenyao = list(f_xunyiwenyao['药品名']) tongyongmingcheng_dingdang = list(f_dingdang['通用名称']) shangpinmingcheng_dingdang = [] for i in list(f_dingdang['商品名称']): ii = re.sub(pattern='\[.*\]', string=i, repl='') iii = re.sub(pattern=r'[(](.*)[)]', string=ii, repl='') iiii = re.sub(pattern=r'(.*)', string=iii, repl='') shangpinmingcheng_dingdang.append(iiii) jinji_chonghe = [] for i in list(set(yaopingming_xunyiwenyao) & set(tongyongmingcheng_dingdang)): jinji_chonghe.append(i) for i in list(set(shangpinmingcheng_dingdang) & set(yaopingming_xunyiwenyao)): jinji_chonghe.append(i) buchongfu_jishu = 0 weichuxian= [] for i in range(f_dingdang.shape[0]): a = f_dingdang['商品名称'].loc[i] a = re.sub(pattern='\[.*\]', string=a, repl='') a = re.sub(pattern=r'[(](.*)[)]', string=a, repl='') a = re.sub(pattern=r'(.*)', string=a, repl='') if a not in jinji_chonghe and f_dingdang['通用名称'].loc[i] not in jinji_chonghe: buchongfu_jishu += 1 weichuxian.append(f_dingdang.loc[i]) # print(f_dingdang.loc[i]) weichuxian = pd.DataFrame(weichuxian) weichuxian.to_csv('../生成文件/叮当快药数据中不存在禁忌关系的药物集合.csv',index=False) print('叮当快药实例数据中共有药物{}个,基于寻医问药数据共有{}个药物添加禁忌关系,占总数量的{}%' .format(len(tongyongmingcheng_dingdang), buchongfu_jishu,str( 100*buchongfu_jishu/len(tongyongmingcheng_dingdang) )[:4] )) # print('叮当快药实例数据中共有以通用名称或商品名称为唯一主键的不重复药物{}个,基于寻医问药数据共有{}个药物添加禁忌关系,占总数量的{}%' # .format(len(set(tongyongmingcheng_dingdang)),len(set(jinji_chonghe)),len(set(jinji_chonghe))/len(set(tongyongmingcheng_dingdang)))) jinji_list = [] for i in range(f_dingdang.shape[0]): if f_dingdang['通用名称'].loc[i] in jinji_chonghe: jinji_list.append([f_dingdang['商品SKU编码'].loc[i], f_dingdang['通用名称'].loc[i]]) elif f_dingdang['商品名称'].loc[i] in jinji_chonghe: jinji_list.append([f_dingdang['商品SKU编码'].loc[i], f_dingdang['商品名称'].loc[i]]) jinji_num = {'药物-禁忌-病人': 0, '药物-禁忌-药物': 0, '药物-禁忌-疾病或病症': 0, '药物-禁忌-成份': 0} new_entiyt = {'病症':0,'病人属性':0,'成份':0,'禁忌相关药物':0} for i in range(f_xunyiwenyao.shape[0]): for jinji in jinji_list: if f_xunyiwenyao['药品名'].loc[i] == jinji[1]: start_node = matcher.match('商品SKU编码').where(name=str(jinji[0])).first() if start_node ==None: start_node = Node('商品SKU编码',name = str(jinji[0])) label = f_xunyiwenyao['节点类型'].loc[i] if label == '疾病或病症': print(1) end_node1 = matcher.match('疾病').where(name=f_xunyiwenyao['禁忌节点'].loc[i]).first() end_node2 = matcher.match('病症').where(name=f_xunyiwenyao['禁忌节点'].loc[i]).first() if end_node1 != None: if ''.join([dict(start_node)['name'],'\t','禁忌','\t',dict(end_node1)['name'],'\n']) not in dingdang_taboo_list: jinji_num['药物-禁忌-疾病或病症'] += 1 graph.create(Relationship(start_node, '禁忌', end_node1)) dingdang_taboo_file.write(''.join([dict(start_node)['name'],'\t',jinji[1],'\t','禁忌','\t',dict(end_node1)['name'],'\n'])) dingdang_taboo_list.append(''.join([dict(start_node)['name'],'\t','禁忌','\t',dict(end_node1)['name'],'\n'])) if end_node2 != None: if ''.join([dict(start_node)['name'],'\t','禁忌','\t',dict(end_node2)['name'],'\n']) not in dingdang_taboo_list: jinji_num['药物-禁忌-疾病或病症'] += 1 graph.create(Relationship(start_node, '禁忌', end_node2)) dingdang_taboo_file.write(''.join([dict(start_node)['name'],'\t',jinji[1],'\t','禁忌','\t',dict(end_node2)['name'],'\n'])) dingdang_taboo_list.append(''.join([dict(start_node)['name'],'\t','禁忌','\t',dict(end_node2)['name'],'\n'])) if end_node2 == None and end_node1 == None: end_node = Node('病症', name=f_xunyiwenyao['禁忌节点'].loc[i]) # 如果在寻医问药中的疾病或病症数据没有在叮当中出现,统一设置为病症 new_entiyt['病症'] +=1 if ''.join([dict(start_node)['name'],'\t','禁忌','\t',dict(end_node)['name'],'\n']) not in dingdang_taboo_list: jinji_num['药物-禁忌-疾病或病症'] += 1 graph.create(Relationship(start_node, '禁忌', end_node)) dingdang_taboo_file.write(''.join([dict(start_node)['name'],'\t',jinji[1],'\t','禁忌','\t',dict(end_node)['name'],'\n'])) dingdang_taboo_list.append(''.join([dict(start_node)['name'],'\t','禁忌','\t',dict(end_node)['name'],'\n'])) elif label == '病人属性': end_node = matcher.match('病人属性').where(name=f_xunyiwenyao['禁忌节点'].loc[i]).first() if end_node == None: end_node = Node('病人属性', name=f_xunyiwenyao['禁忌节点'].loc[i]) new_entiyt['病人属性'] += 1 if ''.join([dict(start_node)['name'], '\t', '禁忌', '\t', dict(end_node)['name'], '\n']) not in dingdang_taboo_list: jinji_num['药物-禁忌-病人'] += 1 graph.create(Relationship(start_node, '禁忌', end_node)) dingdang_taboo_file.write(''.join([dict(start_node)['name'], '\t',jinji[1], '\t', '禁忌', '\t', dict(end_node)['name'], '\n'])) dingdang_taboo_list.append(''.join([dict(start_node)['name'], '\t', '禁忌', '\t', dict(end_node)['name'], '\n'])) elif label == '成份': end_node = matcher.match('成份').where(name=f_xunyiwenyao['禁忌节点'].loc[i]).first() if end_node == None: end_node = Node('成份', name=f_xunyiwenyao['禁忌节点'].loc[i]) new_entiyt['成份'] += 1 if ''.join([dict(start_node)['name'], '\t', '禁忌', '\t', dict(end_node)['name'], '\n']) not in dingdang_taboo_list: jinji_num['药物-禁忌-成份'] += 1 graph.create(Relationship(start_node, '禁忌', end_node)) dingdang_taboo_file.write(''.join([dict(start_node)['name'],'\t',jinji[1], '\t','禁忌','\t',dict(end_node)['name'],'\n'])) dingdang_taboo_list.append(''.join([dict(start_node)['name'], '\t', '禁忌', '\t', dict(end_node)['name'], '\n'])) if end_node != None: if ''.join([dict(start_node)['name'], '\t', '禁忌', '\t', dict(end_node)['name'], '\n']) not in dingdang_taboo_list: jinji_num['药物-禁忌-成份'] += 1 graph.create(Relationship(start_node, '禁忌', end_node)) dingdang_taboo_file.write(''.join([dict(start_node)['name'],'\t',jinji[1],'\t','禁忌','\t',dict(end_node)['name'],'\n'])) dingdang_taboo_list.append(''.join([dict(start_node)['name'], '\t', '禁忌', '\t', dict(end_node)['name'], '\n'])) elif label == '药物': end_name = f_xunyiwenyao['禁忌节点'].loc[i] print('禁忌药物名称',end_name) if end_name in tongyongmingcheng_dingdang: sku = f_dingdang.loc[f_dingdang['通用名称'] == end_name]['商品SKU编码'].tolist()[0] end_node = matcher.match('商品SKU编码').where(name=sku).first() if ''.join([dict(start_node)['name'], '\t', '禁忌', '\t', dict(end_node)['name'], '\n']) not in dingdang_taboo_list: graph.create(Relationship(start_node, '禁忌', end_node)) dingdang_taboo_file.write(''.join([dict(start_node)['name'],'\t',jinji[1],'\t','禁忌','\t',dict(end_node)['name'],'\n'])) jinji_num['药物-禁忌-药物'] += 1 dingdang_taboo_list.append(''.join([dict(start_node)['name'], '\t', '禁忌', '\t', dict(end_node)['name'], '\n'])) elif end_name in shangpinmingcheng_dingdang: sku = f_dingdang.loc[f_dingdang['商品名称'] == end_name]['商品SKU编码'].tolist()[0] end_node = matcher.match('商品SKU编码').where(name=sku).first() if ''.join([dict(start_node)['name'], '\t', '禁忌', '\t', dict(end_node)['name'], '\n']) not in dingdang_taboo_list: graph.create(Relationship(start_node, '禁忌', end_node)) dingdang_taboo_file.write(''.join([dict(start_node)['name'],'\t',jinji[1],'\t','禁忌','\t',dict(end_node)['name'],'\n'])) jinji_num['药物-禁忌-药物'] += 1 dingdang_taboo_list.append(''.join([dict(start_node)['name'], '\t', '禁忌', '\t', dict(end_node)['name'], '\n'])) else: print(f_xunyiwenyao['节点类型'].loc[i], '中的禁忌药物不在叮当快药数据中') pass zong_num = 0 for i in jinji_num.values(): zong_num += i print('在叮当快药知识图谱demo中添加了寻医问药数据中相关的{}条禁忌关系,具体的数量如下:\n关系类别|数量\n:-:|:-:'.format(zong_num)) for k, v in jinji_num.items(): print(k, '|', v) print('\n') zong_num = 0 for i in new_entiyt.values(): zong_num+=i print('在叮当快药知识图谱demo中添加了寻医问药数据禁忌关系中相关的{}个实体,具体的数量如下:\n实体类别|数量\n:-:|:-:'.format(zong_num)) for k,v in new_entiyt.items(): print(k,'|',v) print('\n')
class Database(): """Manage Database. 管理数据库。 Public attributes: - rdb: Relational database. 关系数据库。 - graph: Graph database. 图数据库。 """ def __init__(self, password="******", userid="A0001"): self.rdb = None self.graph = Graph("http://localhost:7474/db/data", password=password) self.selector = NodeMatcher(self.graph) self.user = self.selector.match("User", userid=userid).first() if not self.user: thispath = os.path.split(os.path.realpath(__file__))[0] with open(thispath + '/data/user.txt', 'r', encoding="UTF-8") as file: create_user = file.read() self.graph.run(create_user) self.user = self.selector.match("User", userid=userid).first() self.skb = '' self.dkb = [] def delete(self, pattern="n", label=None): """Batch delete data or subgraph in database. 在数据库中批量删除数据或者子图。 Args: pattern: Type of subgraph. 子图类型。 label: Label of subgraph. 子图标签。 """ if pattern == "all": self.graph.delete_all() elif pattern == "n": self.graph.run("MATCH(n:" + label + ") DETACH DELETE n") elif pattern == "r": self.graph.run("MATCH (n)-[r:" + label + "]-(m) DETACH DELETE r") elif pattern == "nr": self.graph.run("MATCH (n)<-[r:" + label + "]-(m) DETACH DELETE r, n") elif pattern == "rm": self.graph.run("MATCH (n)-[r:" + label + "]->(m) DETACH DELETE r, m") elif pattern == "nrm": self.graph.run("MATCH (n)-[r:" + label + "]-(m) DETACH DELETE r, n, m") def reset(self, pattern="n", label="NluCell", filename=None): """Reset data of label in database. 重置数据库子图。 Args: pattern: Type of subgraph. 子图类型。 label: Label of subgraph. 子图标签。 """ assert filename is not None, "filename can not be None." self.delete(pattern=pattern, label=label) print("Delete successfully!") if os.path.exists(filename): self.handle_excel(filename) else: print( "You can set 'filename=<filepath>' when you call 'Database.reset.'" ) print("Reset successfully!") def reset_ts(self, pattern="n", label="TestStandard", filename=None): """Reset data of label in database. 重置数据库子图。 Args: pattern: Type of subgraph. 子图类型。 label: Label of subgraph. 子图标签。 """ assert filename is not None, "filename can not be None." self.delete(pattern="n", label=label) print("Delete test standard successfully!") if os.path.exists(filename): self.handle_ts(filename) else: print( "You can set 'filename=<filepath>' when you call 'Database.reset.'" ) print("Reset test standard successfully!") def add_nlucell(self, label="NluCell", name=None, content=None, topic="", tid="", \ ftid="", behavior="", parameter="", url="", tag="", keywords="", api="", txt="", \ img="", button="", description="", delimiter='|'): """Add nlucell node in graph. 根据 name, topic, tid 确认节点是否已存在,存在则覆盖,不存在则追加。 问题不能为空,避免因知识库表格填写格式不对而导致存入空问答对 """ assert name is not None, "name must be string." assert content is not None, "content must be string." for question in name.split(delimiter): question = question.strip() if question: # 问题不能为空 # 根据 name, topic, tid 确认节点是否已存在 match_tid = "''" if tid == '' else str(tid) node = self.selector.match("NluCell").where("_.name ='" + question + "'", \ "_.topic ='" + topic + "'", "_.tid =" + match_tid).first() if node: # 存在则覆盖 # node['name'] = question node['content'] = content # node['topic'] = topic # node['tid'] = tid node['ftid'] = ftid node['behavior'] = behavior node['parameter'] = parameter node['url'] = url # node['tag'] = tag node['keywords'] = keywords node['api'] = api node['txt'] = txt node['img'] = img node['button'] = button node['description'] = description self.graph.push(node) else: # 不存在则追加 tag = get_tag(question, self.user) node = Node(label, name=question, content=content, topic=topic, \ tid=tid, ftid=ftid, behavior=behavior, parameter=parameter, \ url=url, tag=tag, keywords=keywords, api=api, txt=txt, img=img, \ button=button, description=description, hot=0) self.graph.create(node) def add_ts(self, label="TestStandard", question=None, content=None, context="", \ behavior="", parameter="", url=""): """ Add test standard node in graph. """ assert question is not None, "question must be string." assert content is not None, "content must be string." for item in question.split(): item = item.strip() if item: # 问题不能为空,避免因知识库表格填写格式不对而导致存入空问答对 node = Node(label, question=item, content=content, context=context, \ behavior=behavior, parameter=parameter, url=url) self.graph.create(node) def handle_ts(self, filename=None, custom_sheets=[]): """Processing data of test standard. """ assert filename is not None, "filename can not be None." data = read_excel(filename) data_sheets = data.sheet_names() if custom_sheets: sheet_names = list( set(data_sheets).intersection(set(custom_sheets))) else: sheet_names = data_sheets for sheet_name in sheet_names: # 可自定义要导入的子表格 table = data.sheet_by_name(sheet_name) # 1.Select specified table # table = data.sheet_by_index(0) if table: # 2.Select specified column col_format = ['A', 'B', 'C', 'D', 'E', 'F'] try: nrows = table.nrows # ncols = table.ncols str_upcase = [i for i in string.ascii_uppercase] i_upcase = range(len(str_upcase)) ncols_dir = dict(zip(str_upcase, i_upcase)) col_index = [ncols_dir.get(i) for i in col_format] # 前两行为表头 for i in range(2, nrows): question = table.cell(i, col_index[0]).value content = table.cell(i, col_index[1]).value context = table.cell(i, col_index[2]).value behavior = table.cell(i, col_index[3]).value parameter = table.cell(i, col_index[4]).value url = table.cell(i, col_index[5]).value self.add_ts(question=question, content=content, context=context, \ behavior=behavior, parameter=parameter, url=url) except Exception as error: print('Error: %s' % error) return None else: print('Error! Data of %s is empty!' % sheet_name) return None def handle_excel(self, filename=None, custom_sheets=[]): """Processing data of excel. """ assert filename is not None, "filename can not be None" data = read_excel(filename) data_sheets = data.sheet_names() if custom_sheets: # 可自定义要导入的子表格 sheet_names = list( set(data_sheets).intersection(set(custom_sheets))) else: sheet_names = data_sheets for sheet_name in sheet_names: table = data.sheet_by_name(sheet_name) topics = [] # 1.Select specified table # table = data.sheet_by_index(0) if table: # 2.Select specified column col_format = [ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O' ] try: nrows = table.nrows # ncols = table.ncols str_upcase = [i for i in string.ascii_uppercase] i_upcase = range(len(str_upcase)) ncols_dir = dict(zip(str_upcase, i_upcase)) col_index = [ncols_dir.get(i) for i in col_format] # 前两行为表头,从第3行开始读取 for i in range(2, nrows): name = table.cell(i, col_index[0]).value content = table.cell(i, col_index[1]).value # Modify:2018-1-17 # 场景 topic 必须填写,问答 topic 可不填,若填写必须为 sheet_name temp = table.cell(i, col_index[2]).value topic = temp if temp else sheet_name temp = table.cell(i, col_index[3]).value tid = int(temp) if temp != '' else '' temp = table.cell(i, col_index[4]).value ftid = int(temp) if temp != '' else '' behavior = table.cell(i, col_index[5]).value parameter = table.cell(i, col_index[6]).value url = table.cell(i, col_index[7]).value tag = table.cell(i, col_index[8]).value keywords = table.cell(i, col_index[9]).value api = table.cell(i, col_index[10]).value txt = table.cell(i, col_index[11]).value img = table.cell(i, col_index[12]).value button = table.cell(i, col_index[13]).value description = table.cell(i, col_index[14]).value # hot = 0 table.cell(i, col_index[15]).value # 3.Your processing function of excel data here self.add_nlucell(name=name, content=content, topic=topic, \ tid=tid, ftid=ftid, behavior=behavior, parameter=parameter, \ url=url, tag=tag, keywords=keywords, api=api, txt=txt, \ img=img, button=button, description=description, delimiter="|") # 添加到场景标签列表 if topic: topics.append(topic) except Exception as error: print('Error: %s' % error) return None else: print('Error! Data of %s is empty!' % sheet_name) return None # Modify in 2017.4.28 # 若子表格名字不存在,新建配置子图,否则只修改topic属性 # DeprecationWarning: Graph.find_one is deprecated, use NodeMatcher instead. 2017-5-18 # config_node = self.graph.find_one("Config", "name", sheet_name) config_node = self.selector.match("Config", name=sheet_name).first() if not config_node: # 默认 self.user 已存在 self.graph.run('MATCH (user:User {userid: "' + self.user["userid"] + \ '"})\nCREATE (config:Config {name: "' + sheet_name + '", topic: "' + \ ",".join(set(topics)) + '"})\nCREATE (user)-[:has {bselected: 1, available: 1}]->(config)') else: # 追加并更新可用话题集 alltopics = config_node["topic"].split( ",") if config_node["topic"] else [] alltopics.extend(topics) config_node["topic"] = ",".join(set(alltopics)) self.graph.push(config_node) def handle_txt(self, filename=None): """ Processing text file to generate subgraph. """ assert filename is not None, "filename can not be None!" with open(filename, encoding="UTF-8") as file: question = file.readline().rstrip() while question: answer = file.readline().rstrip() print("question: " + question) print("answer: " + answer) self.add_nlucell(name=question, content=answer, delimiter="|") question = file.readline().rstrip() def get_available_kb(self): kb = [] match_str = "MATCH (user:User {userid: '" + self.user['userid'] + "'})\ -[r:has {available:1}]->(config:Config) RETURN config.name as name" for item in self.graph.run(match_str): kb.append(item['name']) return kb def get_selected_kb(self): kb = [] match_str = "MATCH (user:User {userid: '" + self.user['userid'] + "'})\ -[r:has {bselected:1, available:1}]->(config:Config) RETURN config.name as name" for item in self.graph.run(match_str): kb.append(item['name']) return kb def download(self, filename=None, names=[]): """下载知识库 """ assert filename is not None, "Filename must be *.xls!" assert names is not [], "Subgraph names can not be empty!" cypher_info = "MATCH (n:NluCell) WHERE n.topic='{topic}' RETURN n" # Modify:使键值按照指定顺序导出 excel (2018-1-8) info = [('name', '问题'), ('content', '回答'), ('topic', '场景标签'), ('tid', '场景ID'), ('ftid', '父场景ID'), ('behavior', '行为'), ('parameter', '动作参数'), ('url', '资源'), ('tag', '语义标签'), ('keywords', '关键词'), ('api', '内置功能'), ('txt', '显示文本'), ('img', '显示图片'), ('button', '显示按钮'), ('description', '场景描述'), ("hot", '搜索热度')] # Modify:若采用字典,可用如下方案(2018-1-9) # import collections # info = collections.OrderedDict(info) sheets = [] for name in names: subgraph = self.selector.match('Config', name=name).first() topics = subgraph["topic"].split(",") if subgraph else [] items = [] for topic in topics: match_str = cypher_info.format(topic=topic) item = list(self.graph.run(match_str).data()) items.extend(item) sheets.append({"name": name, "info": info, "items": items}) write_excel(filename=filename, sheets=sheets) def download_scene(self, label="NluCell", filename=None, topic=''): """Match scene and download. """ assert filename is not None, "Filename must be *.xls!" assert topic is not '', "Topic can not be ''!" info = [('name', '问题'), ('content', '回答'), ('topic', '场景标签'), ('tid', '场景ID'), ('ftid', '父场景ID'), ('behavior', '行为'), ('parameter', '动作参数'), ('url', '资源'), ('tag', '语义标签'), ('keywords', '关键词'), ('api', '内置功能'), ('txt', '显示文本'), ('img', '显示图片'), ('button', '显示按钮'), ('description', '场景描述'), ("hot", '搜索热度')] cypher_info = "MATCH (n:{label}) WHERE n.topic='{topic}' RETURN n" match_str = cypher_info.format(label=label, topic=topic) config_info = "MATCH (n:Config) WHERE n.topic contains '{topic}' RETURN n.name as name" config = self.graph.run(config_info.format(topic=topic)).data() name = list(config)[0]['name'] if config else "业务场景" items = list(self.graph.run(match_str).data()) sheets = [{"name": name, "info": info, "items": items}] write_excel(filename=filename, sheets=sheets) def upload(self, pattern='qa', names=[]): """上传知识库 """ if pattern == 'qa': filename = askopenfilename(filetypes=[('QA', '*.xls')]) self.handle_excel(filename, custom_sheets=names) elif pattern == 'ts': filename = askopenfilename(filetypes=[('测试标准', '*.xls')]) self.handle_ts(filename, custom_sheets=names) def generate_testcases(self, *, filename=None, custom_sheets=None, savedir='.'): """Generating test cases from data of excel. custom_sheets 选择的子表格集合 """ assert filename is not None, "filename can not be None" data = read_excel(filename) # 读取已有excel-知识库 data_sheets = data.sheet_names() if custom_sheets: sheet_names = list( set(data_sheets).intersection(set(custom_sheets))) else: sheet_names = data_sheets file = xlwt.Workbook() # 创建新excel-测试用例 new_sheet = file.add_sheet("NluTest", cell_overwrite_ok=True) # 创建sheet keys = ["问题", "答案", "是否通过", "改进建议"] # 生成表头 new_sheet.write(0, 0, "本地语义常见命令问答测试", set_excel_style('Arial Black', 220, True)) for col, key in enumerate(keys): new_sheet.write(1, col, key, set_excel_style('Arial Black', 220, True)) count = 0 testlist = [] # 生成内容 for sheet_name in sheet_names: table = data.sheet_by_name(sheet_name) # 1.Select specified table # table = data.sheet_by_index(0) if data: # 2.Select specified column col_format = [ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O' ] try: nrows = table.nrows # ncols = table.ncols str_upcase = [i for i in string.ascii_uppercase] i_upcase = range(len(str_upcase)) ncols_dir = dict(zip(str_upcase, i_upcase)) col_index = [ncols_dir.get(i) for i in col_format] # 前两行为表头 for i in range(2, nrows): name = table.cell(i, col_index[0]).value content = table.cell(i, col_index[1]).value questions = name.format(**self.user).split("|") answers = content.format(**self.user).split("|") testlist.extend(questions) new_sheet.write(i + count, 0, "\n".join(questions)) new_sheet.write(i + count, 1, "\n".join(answers)) count += nrows - 2 except Exception as error: print('Error: %s' % error) return None else: print('Error! Data of %s is empty!' % sheet_name) return None file.save(savedir + "/testcase.xls") # 保存文件 with open(savedir + "/testcase.txt", 'w', encoding="UTF-8") as newfile: for item in testlist: newfile.write(item + "\n")
def getAllGenres(): matcher = NodeMatcher(graph) movie = matcher.match("Genre").all() return make_response(jsonify(movie), 200)
from flask_neo4j_lite.config import NeoConfig from py2neo import Graph, NodeMatcher, RelationshipMatcher NeoConfig.graph = Graph(password="******", port="11005") NeoConfig.matcher = NodeMatcher(NeoConfig.graph) NeoConfig.relationship_matcher = RelationshipMatcher(NeoConfig.graph)
import re from py2neo import Graph, Node, Relationship, NodeMatcher, RelationshipMatcher # %% #URL constants GOV_UK_HOME_URL = "https://www.gov.uk" DOCUMENTS_URL = "https://www.gov.uk/guidance/immigration-rules" POINT_BASED_SYSTEM_URL = "https://www.gov.uk/guidance/immigration-rules/immigration-rules-part-6a-the-points-based-system" # %% # Establish database connection graph = Graph('bolt://localhost:7687', auth=('neo4j', 'Undertaker11.')) # %% matcher = NodeMatcher(graph) # %% #Open documents url for scraping driver = webdriver.Chrome(ChromeDriverManager().install()) driver.get(DOCUMENTS_URL) content = driver.page_source soup = BeautifulSoup(content) driver.close() # %% """Creating Immigration Document Nodes""" docs = soup.find(attrs={'class': 'section-list'}) for doc_list_item in docs.findAll('li'):
def delete_value_set_from_graph(value_set: ValueSet, graph: Graph) -> bool: nodes = NodeMatcher(graph) node = nodes.match('ValueSet', id=value_set.id).first() tx = graph.begin() tx.delete(node) tx.commit()
def __init__(self, config): config = self.validate_config(config) self.graph = Graph(**config) self.matcher = NodeMatcher(self.graph)