Exemple #1
1
class TwitterGraph(object):
    '''A class for interfacing with the Neo4j Twitter network database'''

    # Initial setup and linking into the database
    def __init__(self, host_port, user, password):
        '''Makes connection to Neo4j database'''
        # set up authentication parameters
        authenticate(host_port, user, password)
        # connect to authenticated graph database
        url = 'http://{}/db/data/'.format(host_port)
        self.graph = Graph(url)
        try:
            self.graph.schema.create_uniqueness_constraint('User', 'id')
        except: #ConstraintViolationException
            print 'Unique id on Node User already exists'

    # Functions to add data to the database
    def add_following(self, user_id, following_ids, rec_count):
        '''Given a unique user id, adds the relationship for who they follow.
        Adds a User Node with the id if it doesn't exist.'''
        user = Node('User', id=user_id)
        self.graph.merge(user) # important to merge before doing anything
        rec = 1 + rec_count
        # preserving the order of the following. 1 = most recent
        for fid in following_ids:
            user2 = Node('User', id=fid)
            self.graph.merge(user2)
            self.graph.merge(Relationship(user, 'FOLLOWS', user2, rec=rec))
            rec += 1
        user['following_added'] = True
        self.graph.push(user)

    def add_followers(self, user_id, follower_ids, rec_count):
        '''Given a unique user id, adds the relationship for follows them.
        Adds a User Node with the id if it doesn't exist.'''
        user = Node('User', id=user_id)
        self.graph.merge(user)
        rec = 1 + rec_count
        for fid in follower_ids:
            user2 = Node('User', id=fid)
            self.graph.merge(user2)
            self.graph.merge(Relationship(user2, 'FOLLOWS', user, rec=rec))
            rec += 1
        user['followers_added'] = True
        self.graph.push(user)

    def add_user_properties(self, user):
        '''Given a unique user id, adds properties to the existing user Node'''
        try:
            user_id = user.id
            existing_user = Node('User', id=user_id)
            clean_prop_dict = self.__clean_user_dict(user.__dict__)
            self.graph.merge(existing_user)
            for k, v in clean_prop_dict.iteritems():
                existing_user[k] = v
            # add additional label to verified accounts
            if clean_prop_dict['verified']:
                print True
                existing_user.add_label('Verified')
        except:
            # bad user id
            user_id = user['user_id']
            error = user['error']
            existing_user = Node('User', id=user_id)
            self.graph.merge(existing_user)
            existing_user['screen_name'] = 'INVALID'
            existing_user['error'] = error
            print 'Found invalid user id'
        self.graph.push(existing_user)

    def __clean_user_dict(self, user_prop_dict):
        '''Given the '''

        keep = ['contributors_enabled', 'created_at', 'default_profile',
                'default_profile_image', 'description', 'favourites_count',
                'followers_count', 'friends_count', 'geo_enabled', 'id',
                'id_str', 'is_translator', 'lang', 'listed_count', 'location',
                'name', 'profile_image_url_https', 'protected', 'screen_name',
                'statuses_count', 'time_zone', 'utc_offset', 'verified',
                'withheld_in_countries', 'withheld_scope']

        # only keep the above keys for inserting
        clean = {k: v for k, v in user_prop_dict.iteritems() if k in keep}
        image = os.path.splitext(clean['profile_image_url_https'])[0]
        ext = os.path.splitext(clean['profile_image_url_https'])[1]
        clean['profile_image_url_https'] = image.rstrip('_normal') + ext
        # convert date time to string
        clean['created_at_ord'] = clean['created_at'].toordinal()
        clean['created_at'] = clean['created_at'].strftime('%Y-%m-%d %H:%M:%S')
        return clean

    # Functions to query database
    def get_nodes_missing_props(self, limit=100):
        '''Returns the first 100 ids of nodes without user properties'''
        selector = NodeSelector(self.graph)
        selected = selector.select('User').where("_.screen_name IS NULL").limit(limit)
        return [s['id'] for s in selected]

    def get_nodes_missing_props_follb(self, limit=100):
        cypherq = """MATCH (n)-[r:FOLLOWS]->(m)
                     WHERE m.screen_name = 'BernieSanders'
                     AND NOT EXISTS(n.screen_name)
                     RETURN n.id
                     LIMIT 100;"""
        return [i['n.id'] for i in self.graph.run(cypherq).data()]

    def get_nodes_missing_rels(self, rel='FOLLOWING', limit=1):
        '''Returns ids missing the follower or following relationships.
        Valid inputs for rel is FOLLOWING or FOLLOWERS'''
        selector = NodeSelector(self.graph)
        if rel == 'FOLLOWING':
            selected = selector.select('User').where("_.following_added IS NULL").limit(limit)
        elif rel == 'FOLLOWERS':
            selected = selector.select('User').where("_.followers_added IS NULL").limit(limit)
        else:
            # TO DO: flesh out the exception calling
            raise Exception
        return [s['id'] for s in selected]

    def get_nodes_missing_rels_params(self, rel='FOLLOWING'):
        cypherq = """MATCH (n:User)-[r:FOLLOWS]->(m:User)
                                     WHERE n.followers_count >= 1000
                                     AND NOT EXISTS(n.following_added)
                                     AND m.screen_name = 'BernieSanders'
                                     RETURN n.id
                                     LIMIT 100;"""
        return [i['n.id'] for i in self.graph.run(cypherq).data()]

    def get_nodes_missing_rels_bfriends(self, rel='FOLLOWING'):
        cypherq = """MATCH (n:User)<-[r:FOLLOWS]-(m:User)
                                     WHERE m.screen_name = 'BernieSanders'
                                     AND NOT EXISTS(n.following_added)
                                     RETURN n.id
                                     LIMIT 100;"""
        return [i['n.id'] for i in self.graph.run(cypherq).data()]

    def get_nodes_missing_rels_bfriends_step(self, rel='FOLLOWING'):
        cypherq = """MATCH (n:User)<-[r:FOLLOWS]-(m:User)
                                     WHERE m.screen_name = 'BernieSanders'
                                     AND NOT EXISTS(n.following_added)
                                     RETURN n.id
                                     LIMIT 500;"""
        return [i['n.id'] for i in self.graph.run(cypherq).data()[-100:]]
def get_graph():
    global NEO4J_URL,NEO4J_HOST,NEO4J_PORT,NEO4J_AUTH

    # Connect to graph
    creds = NEO4J_AUTH.split('/')
    graph = Graph(user=creds[0], password=creds[1], host=NEO4J_HOST)

    graph.run('match (t:Tweet) return COUNT(t)')
    return graph
Exemple #3
0
def computeShortestPathCoherence(node1, node2, w):
	"""Connects to graph database, then creates and sends query to graph 
	database. Returns the shortest path between two nodes.
	Format: (67149)-[:'LINKS_TO']->(421)"""

	if node1.strip()==node2.strip():
		return w

	fromCache=rds.get("%s:%s" % (node1, node2))
	if fromCache:
		return float(fromCache)*w
	else:
		g = Graph()
		q="MATCH path=shortestPath((m:Page {name:\"%s\"})-[LINKS_TO*1..10]-(n:Page {name:\"%s\"})) RETURN LENGTH(path) AS length, path, m, n" % (node1, node2)

		cursor=g.run(q)
		path=None
		for c in cursor:
			path=c

	#
		if path:
			rds.set("%s:%s" % (node1, node2), 1/path["length"])
			rds.set("%s:%s" % (node2, node1), 1/path["length"])
			return w/path["length"]
		else:
			rds.set("%s:%s" % (node1, node2), 0.0)
			rds.set("%s:%s" % (node2, node1), 0.0)
			return 0.0
    def make_sequence(self):
        authenticate(settings.NeoHost, settings.NeoLog, settings.NeoPass)
        graph = Graph("{0}/db/data/".format(settings.NeoHost))
        query = """MATCH (start:Video)-[:Jaccard*5..10]->(sequence:Video) 
        WHERE start<>sequence MATCH p=shortestPath((start:Video)-[:Jaccard*]->(sequence:Video)) 
        WHERE NONE (n IN nodes(p) WHERE size(filter(x IN nodes(p) WHERE n = x))> 1)  
        RETURN EXTRACT(n IN NODES(p)|[n.id, n.rating]) LIMIT 100000"""
   
        r1 = graph.run(query).data()
        k = 0
        for i in r1:
            #print(i.values)
            for video in i['EXTRACT(n IN NODES(p)|[n.id, n.rating])']:
                 #print(video)
                 self.seq_ids.append(k)
                 self.video_ids.append(video[0])
                 self.ratings.append(video[1])
            k+=1
        data = {'sequence': self.seq_ids, 'video': self.video_ids, 'rating': self.ratings}
        df = pd.DataFrame(data)
        df = df[pd.notnull(df['video'])]
        print(df)
        dz = df.groupby('sequence')['rating'].std()
        print(dz)

        path = '{0}/{1}/'.format(settings.VideosDirPath, self.game)
        if not os.path.exists(path):
            os.makedirs(path)
        file_name = '{0}/sequences.csv'.format(path)
        df.to_csv(file_name, encoding='utf-8')
        summary_data = '{0}/summary.csv'.format(path)
        dz.to_csv(summary_data, encoding='utf-8')
        return
Exemple #5
0
def graph():
	#topic = str(request.args.get('topic'))
	topic = str(request.form['topic'])
	graph = Graph("http://ec2-52-205-15-39.compute-1.amazonaws.com:7474/db/data/")
	node_results = graph.run("MATCH (n:brexit) where has(n.betweenness_centrality) RETURN n.ScreenName as ScreenName, n.TweetId as TweetId, n.FollowerCount as FollowerCount, n.betweenness_centrality as BetweennessCentrality;")
	linksMap = {}
	nodes = []
	links = []
	for index,node_result in enumerate(node_results):
	    linksMap[node_result['ScreenName']]=index
	    nodes.append({'tweetId': str(node_result['TweetId']), 'name' : str(node_result['ScreenName']), 'group' : int(node_result['FollowerCount'])%5,'nodeSize':int(node_result['BetweennessCentrality'])/25+4 })

	rels_results = graph.run("MATCH (a:brexit)-[r:FOLLOWEDBY]-(b:brexit) RETURN a.ScreenName, b.ScreenName")
	for rels_result in rels_results:
		#print(rels_result)
		links.append({'source':linksMap[rels_result['a.ScreenName'].encode('utf-8')],'target':linksMap[rels_result['b.ScreenName'].encode('utf-8')],'value':2})
	json_object = { "nodes": nodes,  "links": links }
	return jsonify(json_object)
Exemple #6
0
def reach():
	#topic = str(request.args.get('topic'))
	topic = str(request.form['topic'])
	graph = Graph("http://ec2-52-205-15-39.compute-1.amazonaws.com:7474/db/data/")
	graph.run("MATCH (n:"+topic+") SET n.FollowerCount = toInt(n.FollowerCount)")
	reach_count_cursor = graph.run("MATCH (n:"+topic+") RETURN count(n)")
	preach_count_cursor = graph.run("MATCH (n:"+topic+") RETURN sum(n.FollowerCount)")
	for reach_count_record in reach_count_cursor:
		reach_count = reach_count_record[0]
	for preach_count_record in preach_count_cursor:
		preach_count = preach_count_record[0]
	#json_object = { "metric": ["Reach", "Potencial Reach"], "frequency": ["reach_count","reach_count"] }
	json_array = []
	json_array.append({'metric':'Reach', 'value':int(reach_count)})
	json_array.append({'metric':'PotencialReach','value':int(preach_count)})
	json_object = {'records':json_array}

	return jsonify(json_object)
Exemple #7
0
def get_requests(item_name, tier=1, enchantment=0, quality=0, after_ts=1000):
	graph = Graph(password='******')
	current_ts = datetime.timestamp(datetime.now())

	query = f'''
	MATCH (:Character)-[r:request]->(i:Item)<-[o:offer]-(:Character)
	WHERE i.Group = "{item_name}" 
	AND i.Tier = {tier}
	AND i.Enchantment = {enchantment}
	AND i.Quality = {quality}
	AND ({current_ts} - r.LastViewed) < {after_ts}
	AND ({current_ts} - o.LastViewed) < {after_ts}
	AND (r.UnitPriceSilver < o.UnitPriceSilver) 
	RETURN i, (r.UnitPriceSilver - o.UnitPriceSilver) as profit
	ORDER BY profit
	'''

	return graph.run(query)
Exemple #8
0
def get_profitable_trades(after_ts=100000):
	graph = Graph(password='******')
	current_ts = datetime.timestamp(datetime.now())

	query = f'''
	MATCH (:Character)-[r:request]->(i:Item)<-[o:offer]-(:Character)
	WHERE ({current_ts} - r.LastViewed) < {after_ts}
	AND ({current_ts} - o.LastViewed) < {after_ts}
	AND (r.UnitPriceSilver - o.UnitPriceSilver) > 0 
	RETURN i, r.UnitPriceSilver as sell_price, o.UnitPriceSilver as buy_price, (r.UnitPriceSilver - o.UnitPriceSilver) as profit
	ORDER BY profit
	'''

	response = graph.run(query)


	for r in response:
		data = r.data()
		item = data['i']

		buy_price  = data['buy_price']
		sell_price = data['sell_price']
		profit     = data['profit']

		item_name = item_dict[item['Group']]

		print()
		print('__' * 20)
		print('>>> ', item_name)
		print('T: ', item['Tier'])
		print('E: ', item['Enchantment'])
		print('Q: ', item['Quality'])
		print('Buy for:  $', str(buy_price)[:-4])
		print('Sell for: $', str(sell_price)[:-4])
		print('PROFIT =  $', str(profit)[:-4])
		print('__' * 20)
from py2neo import Graph, Node
import os


if __name__ == "__main__":
    pw = os.environ.get('NEO4J_PASS')
    g = Graph("http://localhost:7474/", password=pw)  ## readme need to document setting environment variable in pycharm
    tx = g.begin()

    q1 = '''MATCH (p:Provider) RETURN id(p), p.npi
    '''
    providers = g.run(q1)

    #======= RETURN provider object: list of dics, key: npi, id ======#
    provider_lst = []
    for provider in providers:
        provider_dic = {}
        provider_dic['id'] = provider['id(p)']
        provider_dic['npi'] = provider['p.npi']
        provider_lst.append(provider_dic)

    # ===================== Create relation, Iterate Provider (faster, about 5000000 interations)====================#
    q2 = '''
        MATCH (p:Provider) where id(p) = {id_p}
        MATCH (pc:Prescription) where pc.npi = {p_npi}
        CREATE (p)-[:WRITES]->(pc)'''

    match_num = 0 #2407851
    for p in provider_lst:
        p_npi = p['npi']
        id_p = p['id']
# Config file with graph location details

with open("neo4jconfig.yml", 'r') as ymlfile:
    cfg = yaml.load(ymlfile)

graph = Graph(cfg["graph"]+"/db/data")


# Script to batch import the user nodes into neo4j

load_script = """
USING PERIODIC COMMIT 1000
load csv with headers from %s as row
merge (:brexit {TweetId:row.TweetId,CreatedAt:row.CreatedAt,ScreenName:row.ScreenName,FollowerCount:row.FollowerCount})
"""
graph.run(load_script % (cfg["users"])


# Script to batch import the realtionships into neo4j

rels_script = """
load csv with headers from %s as row2
MATCH (u1:brexit {ScreenName:row2.Users})
MATCH(u2:brexit {ScreenName:row2.Followers})
CREATE (u1)-[:FOLLOWEDBY(]->(u2)"""
graph.run(rels_script % (cfg["relationships"]))


# Mazerunner/Spark-Noe4j HTTP GET request to calculate betweenness centrality

url = cfg["graph"]+"/service/mazerunner/analysis/betweenness_centrality/FOLLOWEDBY"
Exemple #11
0
          
          # create street in database
          street = Node("Street", nodeids=waynodes, nameslug=wayname)
          tx = g.begin()
          tx.create(street)
          tx.commit()
        
        else:
          # know this street, add the way id
          street['nodeids'] = street['nodeids'] + waynodes
          street.push()

        # now add relationships to nodes in the way
        for node in waynodes:
          if(node in knownnodes):
            streetnames = g.run("MATCH (n:Street) WHERE {nodeid} IN n.nodeids RETURN n.nameslug LIMIT 25", nodeid=node)
            for streetrecord in streetnames:
              streetname = streetrecord['n.nameslug']
              if streetname == wayname:
                continue
              print('matching ' + streetname + ' and ' + wayname)
              street2 = g.find_one("Street", "nameslug", streetname)
              if street2 is None:
                continue
              intersect = Relationship(street, "MEETS", street2)
              intersect2 = Relationship(street2, "MEETS", street)
              tx = g.begin()
              tx.create(intersect)
              tx.create(intersect2)
              tx.commit()
            
from string_converter import string_filter
from fuzzywuzzy import fuzz

#add 2301 rel

if __name__ == "__main__":
    pw = os.environ.get('NEO4J_PASS')
    g = Graph("http://localhost:7474/", password=pw)  ## readme need to document setting environment variable in pycharm
    tx = g.begin()

    #======= RETURN Drug object: list of dics, key: labelerName, id ======#
    q1 = '''
    MATCH (d: Drug)
    RETURN id(d), d.labelerName
    '''
    drug_obj = g.run(q1)
    drugs_lst = []
    for object in drug_obj:
        drug_dic = {}
        drug_dic['id'] = object['id(d)']
        drug_dic['labelerName'] = object['d.labelerName']
        drugs_lst.append(drug_dic)

    #======= RETURN DrugFirm object: list of dics, key: firmName, id ======#
    q2 = '''
    MATCH (df:DrugFirm)
    RETURN id(df), df.firmName'''
    df_obj = g.run(q2)
    df_lst = []
    for object in df_obj:
        df_dic = {}
    '''
    index4 = '''
    CREATE INDEX ON :Issue(issueNumber)
    '''
    index5 = '''
    CREATE INDEX ON :Lobbyist(firstName)
    '''

    index6 = '''
        CREATE INDEX ON :Lobbyist(lastName)
        '''

    index7 = '''
        CREATE INDEX ON :Lobbyist(position);
        '''
    g.run(index1)
    g.run(index2)
    g.run(index3)
    g.run(index4)
    g.run(index5)
    g.run(index6)
    g.run(index7)

    f1 = get_file_path('2013_1stQuarter_XML')
    f2 = get_file_path('2013_2ndQuarter_XML')
    f3 = get_file_path('2013_3rdQuarter_XML')
    f4 = get_file_path('2013_4thQuarter_XML')

    files = f1 + f2 + f3 + f4

    for file in files:
Exemple #14
0
class AnswerClassifier:
    def __init__(self):

        self.g = Graph(
            host="127.0.0.1",
            http_port=7474,
            user="******",
            password="******")
        self.fit_up_symptom = ['情绪低落', '兴趣减退', '精力丧失', '注意力降低', '自信心丧失', '自责自罪', '前途问题', '自杀', \
                      '睡眠障碍', '食欲改变', '疼痛', '月经问题', '头晕', '虚弱', '心脏问题', '胸闷问题', '性功能障碍', \
                      '排泄不适', '消化不适', '妄想', '幻觉', '抑郁性木僵']

    '''分类主函数'''
    def classify1(self, word_dict, preques_infor, down_symptom):
        medical_dict,down_symptom = self.word_analysis(word_dict,down_symptom)
        data = {}

        if not medical_dict:
            types = []
        else:
            types = list(medical_dict.keys())

        # 在这里开始是我需要去考虑的东西.在具有了这些东西之后,我如何进行封装。

        preques_type = preques_infor['question_type']
        prediagnosis_infor = preques_infor['diagnosis_infor']

        # 确定下一个问题的类型
        '''
        [在诊断完成之后,再进行修改]
        目前的方案是分为上一个问题是'new_symptom',还是'spec_symptom',还是'ques_degree'
        如果是'new_symptom',就要将症状词语加上,但结果中如果有两个以上(除症状外两个以上的词)并包括频率的其中一种,就为'new_symptom'。
        否则如果没有频率词就问'ques_degree',如果只有一个词,那就'spec_symptom'
        如果是'spec_symptom',则需要将症状词语, 程度词语。因为用户肯定会有否定或者肯定回答,因此直接采取'new_symptom'。
        '''
        question_type = ''

        if preques_type == 'new_symptom':
            keynum = len(types) - 1
            # 当患者描述了一种完整的症状,但是这种情况下可能症状会出现答非所问
            if 'symptom' in types and ('time' in types or 'frequent' in types):

                if 'symptom' in list(preques_infor['diagnosis_infor'].keys()):
                    current_symptom = preques_infor['diagnosis_infor']['symptom']
                    if medical_dict['symptom'] != current_symptom:
                        preques_infor['mis_num'] += 1
                        preques_infor['mis_symptom'].append([current_symptom,medical_dict['symptom']])
                        print("答非所问:咨询%s症状, 患者回答%s症状" %(current_symptom,medical_dict['symptom']))

                # 这里要去维护已经完成的诊断列表
                question_type = 'new_symptom'
                preques_infor['question_type'] = question_type
                preques_infor['diagnosis_infor'] = medical_dict



            # 当患者并没有症状出现,则将对话系统设置的症状给记录下来
            elif bool(1 - ('symptom' in types)):
                if bool(1 -('symptom' in list(prediagnosis_infor.keys()))):
                    prediagnosis_infor['symptom'] = '情绪低落'
                medical_dict['symptom'] = prediagnosis_infor['symptom']
                word_dict['symptoms'].append(medical_dict['symptom'])
                word_dict['syms_score'].append(1)
                types.append('symptom')
                keynum += 1
            # 当患者描述了有完整的症状,但是这种情况下可能症状会出现答非所问
            else:
                if 'symptom' in list(preques_infor['diagnosis_infor'].keys()):
                    current_symptom = preques_infor['diagnosis_infor']['symptom']
                    if medical_dict['symptom'] == current_symptom:
                        preques_infor['mis_num'] += 1
                        preques_infor['mis_symptom'].append([current_symptom, medical_dict['symptom']])
                        print("答非所问:咨询%s症状, 患者回答%s症状" % (current_symptom, medical_dict['symptom']))

            if question_type == '':
                if 'time' in types or 'frequent' in types:
                # 这里要去维护已经完成的诊断列表
                    question_type = 'new_symptom'
                    preques_infor['question_type'] = question_type
                    preques_infor['diagnosis_infor'] = medical_dict
                else:
                    question_type = 'spec_symptom'
                    preques_infor['question_type'] = question_type
                    # 'spec_symptom'查询要问的子症状
                    sub_symptom = self.search('up_down_symptom', medical_dict['symptom'],down_symptom)
                    data['sub_symptom'] = sub_symptom
                    # 这里不能给那边进行诊断,所以我需要加入进preques_infor列表中
                    medical_dict['sub_symptom'] = sub_symptom
                    medical_dict['time'] = '大部分时间'
                    preques_infor['diagnosis_infor'] = medical_dict
        elif preques_type == 'spec_symptom':
            pre_medical_dict = preques_infor['diagnosis_infor']
            # 这里也有可能出现答非所问
            if 'symptom' in types:
                if medical_dict['symptom'] != pre_medical_dict['symptom']:
                    preques_infor['mis_num'] += 1
                    preques_infor['mis_symptom'].append([pre_medical_dict['symptom'], medical_dict['symptom']])
                    print("答非所问:咨询%s症状, 患者回答%s症状" % (pre_medical_dict['symptom'], medical_dict['symptom']))

            for k in pre_medical_dict.keys():
                if bool(1-(k in types)) and k != 'denyword':
                    medical_dict[k] = pre_medical_dict[k]

            word_dict['symptom'] = medical_dict['symptom']
            word_dict['sub_symptom'] = medical_dict['sub_symptom']
            # 这种情况直接默认(可诊断)为'new_symptom', 这里要去维护已经完成的诊断列表
            question_type = 'new_symptom'
        elif preques_type == 'ques_degree':
            pre_medical_dict = preques_infor['diagnosis_infor']
            for k in pre_medical_dict.keys():
                if bool(1-(k in types)):
                    medical_dict[k] = pre_medical_dict[k]
            # 这种情况直接默认(可诊断)为'new_symptom', 这里要去维护已经完成的诊断列表
            question_type = 'new_symptom'

        # print('preques_type: %s, question_types: %s.' % (preques_infor['question_type'], question_type))
        # 将多个分类结果进行合并处理,组装成一个字典
        data['question_type'] = question_type
        preques_infor['question_type'] = question_type
        return data,  preques_infor, word_dict,down_symptom

    def word_analysis(self,word_result,down_symptom):
        medical_dict = {}
        if word_result['symptoms'] != []:
            sub_symptom = word_result['symptoms'][0]
            print('sub_symptom:' + sub_symptom)
            # 将小症状映射到大症状的过程(利用知识图谱)
            if sub_symptom in self.fit_up_symptom:
                medical_dict['symptom'] = sub_symptom
            else:
                down_symptom.append(sub_symptom)
                symptom = self.search('down_up_symptom',sub_symptom,down_symptom)[0]
                medical_dict['symptom'] = symptom
        tmplist = ['time','frequent','degree','denyword']
        for i, word in enumerate(word_result['other_words']):
            if word != {}:
                medical_dict[tmplist[i]] = list(word.keys())[0]
        return medical_dict,down_symptom


    def search(self,sql_type, data,down_symptom):
        sql = self.build_sql(sql_type, data)[0]
        ress = self.g.run(sql).data()
        keyword = ''
        if sql_type == 'up_down_symptom':
            down_symptoms = list(set([i['n.name'] for i in ress]))
            if len(down_symptoms) == 1:
                symptoms_num = 1
            else:
                # 检查子症状中是否出现过
                for w in down_symptoms:
                    if w in down_symptom:
                        down_symptoms.remove(w)
                if down_symptoms == []:
                    down_symptoms.append(data)
                symptoms_num = min(random.randint(1, len(down_symptoms)), 4)
                random.shuffle(down_symptoms)
            keyword = down_symptoms[:symptoms_num]
            # select_symptoms = down_symptoms[:symptoms_num]
            # keyword = '、'.join(select_symptoms)
        elif sql_type == 'down_up_symptom':
            keyword = list(set([i['m.name'] for i in ress]))
        return keyword

    def build_sql(self, sql_type, data):
        sql = []
        datas = [data]
        if sql_type=='down_up_symptom':
            sql = ["MATCH (m:down_symptom)-[r:sub_sub_symptom]->(n:down2_symptom) where n.name = '{0}' return m.name".format(i) for i in datas]
        elif sql_type=='up_down_symptom':
            sql = ["MATCH (m:down_symptom)-[r:sub_sub_symptom]->(n:down2_symptom) where m.name = '{0}' return n.name".format(i) for i in datas]
        return sql


    # def symptom_match(self,types):
    #     # 这里的主要的逻辑是诊断过程中,是否可以匹配上。
    #     if ('symptom' in types) and ('frequent' in types):
    #         return 'full'
    #     elif ('symptom' in types):
    #         return 'no_frequent'
    #     else:
    #         return 'no_symptom'

    '''构造词对应的类型'''
    # def build_wdtype_dict(self):
    #     wd_dict = dict()
    #     for wd in self.region_words:
    #         wd_dict[wd] = []
    #         if wd in self.disease_wds:
    #             wd_dict[wd].append('disease')
    #         if wd in self.drug_wds:
    #             wd_dict[wd].append('drug')
    #         if wd in self.symptom_wds:
    #             wd_dict[wd].append('symptom')
    #         if wd in self.degree_wds:
    #             wd_dict[wd].append('degree')
    #         if wd in self.frequent_wds:
    #             wd_dict[wd].append('frequent')
    #         if wd in self.time_wds:
    #             wd_dict[wd].append('time')
    #         if wd in self.yesno_wds:
    #             wd_dict[wd].append('yesno')
    #
    #
    #     return wd_dict

    '''构造actree,加速过滤'''
from string_converter import uniq_elem
from fuzzywuzzy import fuzz


if __name__ == "__main__":
    pw = os.environ.get('NEO4J_PASS')
    g = Graph("http://localhost:7474/", password=pw)
    tx = g.begin()

    idx1 = '''
    CREATE INDEX ON: Legislator(name)
    '''
    idx2 = '''
    CREATE INDEX ON: LegislatorInfo(wikipediaID)
    '''
    g.run(idx1)
    g.run(idx2)


    create_legislatorInfo = '''
    LOAD CSV WITH HEADERS
    FROM 'https://dl.dropboxusercontent.com/u/67572426/legislators-current.csv' AS line
    MERGE (legislator:LegislatorInfo { thomasID: line.thomasID })
    ON CREATE SET legislator = line
    ON MATCH SET legislator = line
    MERGE (s:State {code: line.state})
    CREATE UNIQUE (legislator)-[:REPRESENTS]->(s)
    MERGE (p:Party {name: line.currentParty})
    CREATE UNIQUE (legislator)-[:IS_MEMBER_OF]->(p)
    MERGE (b:Body {type: line.type})
    CREATE UNIQUE (legislator)-[:ELECTED_TO]->(b);
Exemple #16
0
class GraphHeroSummoner():
    def __init__(self):
        print("GraphHeroSummoner init...")
        self.hero_page_path = "../spiderData/hero_page.json"
        self.g = Graph("http://localhost:7474",
                       username="******",
                       password="******")
        self.relation = {
            '1': "最佳搭档",
            '2': "最佳搭档",
            '3': "压制英雄",
            '4': "压制英雄",
            '5': "被压制英雄",
            '6': "被压制英雄"
        }

    def read_hero_page(self):
        for item in open(self.hero_page_path, encoding='utf-8'):
            data = json.loads(item)
            summoner_ids = data["hero_summoner"][0].split("|")
            # for summoner_id in summoner_ids:
            #     self.create_hero_summoner_relationship("hero", "summoner", [data["name"], summoner_id], "推荐", "召唤师技能推荐")
            ex = 'img201606/heroimg/(.*?).jpg'
            num = 1
            for relation_url in data["relation_uri"]:
                hero_ename = re.findall(ex, relation_url,
                                        re.S)[0].split("/")[0]
                self.create_hero_partner_relationship(
                    "hero", "hero", [data["name"], hero_ename],
                    self.relation[f'{num}'], self.relation[f'{num}'],
                    data["relation_desc"][num - 1])
                num += 1

    '''创建英雄铭文关联边'''

    def create_hero_summoner_relationship(self, start_node, end_node, edges,
                                          rel_type, rel_name):
        print(
            f"create_relationship start_node:{start_node} end_node:{end_node} edges:{edges} rel_type:{rel_type} rel_name:{rel_name}"
        )
        p = edges[0]
        q = edges[1]
        query = "match(p:%s),(q:%s) where p.name='%s'and q.summoner_id=%s create (p)-[rel:%s{name:'%s'}]->(q)" % (
            start_node, end_node, p, q, rel_type, rel_name)
        try:
            self.g.run(query)
            print(p, rel_type, q)
        except Exception as e:
            print(e)
        return

    def create_hero_partner_relationship(self, start_node, end_node, edges,
                                         rel_type, rel_name, explain):
        print(
            f"create_relationship start_node:{start_node} end_node:{end_node} edges:{edges} rel_type:{rel_type} rel_name:{rel_name}"
        )
        p = edges[0]
        q = edges[1]
        query = "match(p:%s),(q:%s) where p.name='%s'and q.hero_ename='%s' create (p)-[rel:%s{name:'%s',desc:'%s'}]->(q)" % (
            start_node, end_node, p, q, rel_type, rel_name, explain)
        try:
            # print(query)
            self.g.run(query)
            print(p, rel_type, q)
        except Exception as e:
            print(e)
        return

    def start(self):
        self.read_hero_page()
Exemple #17
0
class Neo4j(object):
    def __init__(self):
        '''
        neo4j查询语言
        '''
        self.query_models = {
            0: "match(n:Movie) where n.title='{title}' return n.rating",  # nm
            1:
            "match(n:Movie) where n.title='{title}' return n.releasedate",  # nm
            2:
            "match(n:Movie)-[r:is]->(b:Genre) where n.title='{title}' return b.name",  # nm
            3:
            "match(n:Movie) where n.title ='{title}' return n.introduction",  # nm
            4:
            "match(n:Person)-[:actedin]-(m:Movie) where m.title ='{title}' return n.name",  # nm
            5:
            "match(n:Person) where n.name='{name}' return n.birthplace",  # nnt
            6:
            "match(n:Person)-[:actedin]-(m:Movie) where n.name ='{name}' match(g:Genre)-[:is]-(m) where g.name=~'{gname}' return distinct  m.title",  # nnt, ng
            7:
            "match(n:Person)-[:actedin]->(m:Movie) where n.name='{name}' return m.title",  # nnt
            8:
            "match(n:Person)-[:actedin]-(m:Movie) where n.name ='{name}' and m.rating > {score} return m.title",  # nnt, x
            9:
            "match(n:Person)-[:actedin]-(m:Movie) where n.name ='{name}' and m.rating < {score} return m.title",  # nnt, x
            10:
            "match(n:Person)-[:actedin]-(m:Movie) where n.name ='{name}' match(p:Genre)-[:is]-(m) return distinct  p.name",  # nnt
            # 11: 单独处理
            12:
            "match(n)-[:actedin]-(m) where n.name ='{name}' return count(*)",  # nnt
            13: "match(n:Person) where n.name='{name}' return n.birth",  #  nnt
        }

        self.graph = Graph('http://localhost:7474',
                           username="******",
                           password="******")

    def query(self, model_label, query_dict):

        try:
            if model_label == 11:
                data_x = self._query(model_label=7,
                                     query_dict={"nnt": query_dict["nnt"]})
                data_y = self._query(model_label=7,
                                     query_dict={"nnt": query_dict["nnr"]})
                result = list(data_x.intersection(data_y))
                return result
            else:
                result = list(self._query(model_label, query_dict))
                return result
        except:
            return []

    def _query(self, model_label, query_dict):

        query_str = ""

        if model_label in [0, 1, 2, 3, 4]:
            query_str = self.query_models[model_label].format(
                title=query_dict["nm"])
        elif model_label in [5, 7, 10, 12, 13]:
            query_str = self.query_models[model_label].format(
                name=query_dict["nnt"])
        elif model_label == 6:
            query_str = self.query_models[model_label].format(
                name=query_dict["nnt"], gname=query_dict["ng"])
        elif model_label in [8, 9]:
            query_str = self.query_models[model_label].format(
                name=query_dict["nnt"], score=query_dict["x"])

        data = self.graph.run(query_str).data()
        result = set()
        for dic in data:
            temp = [item[1] for item in dic.items()]  # 一个元素
            result.add(temp[0])

        return result
Exemple #18
0
from py2neo import Graph, Node, Relationship
graph = Graph(url)
graph.delete_all()

# Creating Nodes
manas = Node('Person', name='Manas', age='22')
lakshya = Node('Person', name='Lakshya', age='101 NOT OUT')
divya = Node('Person', name='Divya', age='21')
# graph = Graph()

graph.create(manas)
graph.create(lakshya)
graph.create(divya)

# CREATING UNI-DIRECTIONAL RELATIONSHIPS
graph.create(Relationship(lakshya, 'BULLIES😢', manas))

# EXECUTING AND VIEWING QUERIES
query = 'match (person:Person) return person'
records = graph.run(query)

for record in records:
    node = record.get('person')
    print(node)

# MATCHING USING 'match' method
results = graph.nodes.match('Person')
for node in results:
    print(node)
Exemple #19
0
def ne4jquery(query):
    graph = Graph(NEO4J_SERVER)
    return graph.run(query).to_data_frame()
Exemple #20
0
class FanGraph(object):
    """
    This object provides a set of helper methods for creating and retrieving Nodes and relationship from
    a Neo4j database.
    """

    # Connects to the DB and sets a Graph instance variable.
    # Also creates a NodeMatcher, which is a py2neo class.
    def __init__(
        self,
        auth,
        host,
        port,
        secure=False,
    ):
        self._graph = Graph(secure=secure,
                            bolt=True,
                            auth=auth,
                            host=host,
                            port=port)
        self._node_matcher = NodeMatcher(self._graph)

    def run_match(self, labels=None, properties=None):
        """
        Uses a NodeMatcher to find a node matching a "template."
        :param labels: A list of labels that the node must have.
        :param properties: A parameter list of the form prop1=value1, prop2=value2, ...
        :return: An array of Node objects matching the pattern.
        """
        #ut.debug_message("Labels = ", labels)
        #ut.debug_message("Properties = ", json.dumps(properties))

        if labels is not None and properties is not None:
            result = self._node_matcher.match(labels, **properties)
        elif labels is not None and properties is None:
            result = self._node_matcher.match(labels)
        elif labels is None and properties is not None:
            result = self._node_matcher.match(**properties)
        else:
            raise ValueError(
                "Invalid request. Labels and properties cannot both be None.")

        # Convert NodeMatch data into a simple list of Nodes.
        full_result = []
        for r in result:
            full_result.append(r)

        return full_result

    def find_nodes_by_template(self, tmp):
        """

        :param tmp: A template defining the label and properties for Nodes to return. An
         example is { "label": "Fan", "template" { "last_name": "Ferguson", "first_name": "Donald" }}
        :return: A list of Nodes matching the template.
        """
        labels = tmp.get('label')
        props = tmp.get("template")
        result = self.run_match(labels=labels, properties=props)
        return result

    # Create and save a new node for  a 'Fan.'
    def create_fan(self, uni, last_name, first_name):
        n = Node("Fan", uni=uni, last_name=last_name, first_name=first_name)
        tx = self._graph.begin(autocommit=True)
        tx.create(n)

    # Given a UNI, return the node for the Fan.
    def get_fan(self, uni):
        n = self.find_nodes_by_template({
            "label": "Fan",
            "template": {
                "uni": uni
            }
        })
        if n is not None and len(n) > 0:
            n = n[0]
        else:
            n = None

        return n

    def create_player(self, player_id, last_name, first_name):
        n = Node("Player",
                 player_id=player_id,
                 last_name=last_name,
                 first_name=first_name)
        tx = self._graph.begin(autocommit=True)
        tx.create(n)
        return n

    def get_player(self, player_id):
        n = self.find_nodes_by_template({
            "label": "Player",
            "template": {
                "player_id": player_id
            }
        })
        if n is not None and len(n) > 0:
            n = n[0]
        else:
            n = None

        return n

    def create_team(self, team_id, team_name):
        n = Node("Team", team_id=team_id, team_name=team_name)
        tx = self._graph.begin(autocommit=True)
        tx.create(n)
        return n

    def get_team(self, team_id):
        n = self.find_nodes_by_template({
            "label": "Team",
            "template": {
                "team_id": team_id
            }
        })
        if n is not None and len(n) > 0:
            n = n[0]
        else:
            n = None

        return n

    def create_supports(self, uni, team_id):
        """
        Create a SUPPORTS relationship from a Fan to a Team.
        :param uni: The UNI for a fan.
        :param team_id: An ID for a team.
        :return: The created SUPPORTS relationship from the Fan to the Team
        """
        f = self.get_fan(uni)
        t = self.get_team(team_id)
        r = Relationship(f, "SUPPORTS", t)
        tx = self._graph.begin(autocommit=True)
        tx.create(r)
        return r

    # Create an APPEARED relationship from a player to a Team
    def create_appearance(self, player_id, team_id):
        try:
            f = self.get_player(player_id)
            t = self.get_team(team_id)
            r = Relationship(f, "APPEARED", t)
            tx = self._graph.begin(autocommit=True)
            tx.create(r)
        except Exception as e:
            print("create_appearances: exception = ", e)

    # Create a FOLLOWS relationship from a Fan to another Fan.
    def create_follows(self, follower, followed):
        f = self.get_fan(follower)
        t = self.get_fan(followed)
        r = Relationship(f, "FOLLOWS", t)
        tx = self._graph.begin(autocommit=True)
        tx.create(r)

    def get_comment(self, comment_id):
        n = self.find_nodes_by_template({
            "label": "Comment",
            "template": {
                "comment_id": comment_id
            }
        })
        if n is not None and len(n) > 0:
            n = n[0]
        else:
            n = None

        return n

    def create_comment(self, uni, comment, team_id=None, player_id=None):
        """
        Creates a comment
        :param uni: The UNI for the Fan making the comment.
        :param comment: A simple string.
        :param team_id: A valid team ID or None. team_id and player_id cannot BOTH be None.
        :param player_id: A valid player ID or None
        :return: The Node representing the comment.
        
        """

        if comment:
            if 'sucks' in comment or 'fuc*' in comment or 'sh*t' in comment:
                comment_id = str(uuid.uuid4())
                com = Node("Comment",
                           comment_id=comment_id,
                           team_id=team_id,
                           player_id=player_id,
                           comment='****')
            else:
                comment_id = str(uuid.uuid4())
                com = Node("Comment",
                           comment_id=comment_id,
                           team_id=team_id,
                           player_id=player_id,
                           comment=comment)

            tx = self._graph.begin(autocommit=True)
            tx.create(com)
            fan = self.get_fan(uni)

            # fan and comment are necessary here
            # define relation shape with COMMENT_BY:
            r_fan_com = Relationship(fan, "COMMENT_BY", com)
            tx = self._graph.begin(autocommit=True)
            tx.create(r_fan_com)
            # Then define two kinds of COMMENT_ON
            if team_id is not None:
                tm = self.get_team(team_id)
                r_com_tm = Relationship(com, "COMMENT_ON", tm)
                tx = self._graph.begin(autocommit=True)
                tx.create(r_com_tm)
            if player_id is not None:
                plr = self.get_player(player_id)
                r_com_plr = Relationship(com, "COMMENT_ON", plr)
                tx = self._graph.begin(autocommit=True)
                tx.create(r_com_plr)
            return com
        else:
            raise NameError('There is no valid comments,please check it again')

    def examine_sub_comment(self, uni, origin_comment_id, comment):
        '''
        This function designed to examine whether our input is valid or not
        '''
        examine_ori = self.get_comment(origin_comment_id)
        examine_fan = self.get_fan(uni)
        if not comment:
            raise NameError('There are some problems about new_comment')
        if examine_ori is None:
            raise NameError('There are some problems about origin_comment_id')
        if examine_fan is None:
            raise NameError('There are some problems about fans')

    def create_sub_comment(self, uni, origin_comment_id, comment):
        """
        Create a sub-comment (response to a comment or response) and links with parent in thread.
        :param uni: ID of the Fan making the comment.
        :param origin_comment_id: Id of the comment to which this is a response.
        :param comment: Comment string
        :return: Created comment.
        """
        #examine the correctness of my function and input:
        self.examine_sub_comment(uni, origin_comment_id, comment)
        if 'sucks' in comment or 'fuc*' in comment or 'sh*t' in comment:
            comment_id = str(uuid.uuid4())
            com = Node("Comment", comment_id=comment_id, comment='****')
        else:
            comment_id = str(uuid.uuid4())
            com = Node("Comment", comment_id=comment_id, comment=comment)
        existed_comment = self.get_comment(origin_comment_id)
        tx = self._graph.begin(autocommit=True)
        tx.create(com)

        fan = self.get_fan(uni)
        # Response to :
        #r_fan_com = Relationship(fan, "RESPONSE_TO", existed_comment)
        r_com_ori = Relationship(com, "RESPONSE_TO", existed_comment)
        tx = self._graph.begin(autocommit=True)
        tx.create(r_com_ori)
        #comments From fan to
        '''
        r_fan_ori = Relationship(fan, "COMMENT_BY", existed_comment)
        tx = self._graph.begin(autocommit=True)
        tx.create(r_fan_ori)
        '''
        # Response by ( from new to fan to new
        r_fan_com = Relationship(fan, "RESPONSE_BY", com)
        tx = self._graph.begin(autocommit=True)
        tx.create(r_fan_com)

        return com

    def get_player_comments(self, player_id):
        """
        Gets all of the comments associated with a player, all of the comments on the comment and comments
        on the comments, etc. Also returns the Nodes for people making the comments.
        :param player_id: ID of the player.
        :return: Graph containing comment, comment streams and commenters.
        """

        q = 'match (player:Player{player_id:' + "'" + player_id + "'" + '})-[on:COMMENT_ON*]-(comment:Comment)- \
        [response_to:RESPONSE_TO*]-(sub_comment:Comment)-[response_by:RESPONSE_BY*]-(fan2:Fan) with \
        player,on,comment,response_to,sub_comment,response_by,fan2 match (comment)-[comment_by:COMMENT_BY*] \
        -(fan:Fan) return player,on,comment,comment_by,fan,response_to,sub_comment,response_by,fan2'

        res = self._graph.run(q)
        res = res.data(
        )  #Checking through google and apply in this way can help me to collect complete data
        #res = list(res)
        #print('q:',q)
        return res

    def get_team_comments(self, team_id):
        """
        Gets all of the comments associated with a teams, all of the comments on the comment and comments
        on the comments, etc. Also returns the Nodes for people making the comments.
        :param player_id: ID of the team.
        :return: Graph containing comment, comment streams and commenters.
        """
        q = 'match (team:Team{team_id:' + "'" + team_id + "'" + '})-[on:COMMENT_ON*]-(comment:Comment)- \
        [response_to:RESPONSE_TO*]-(sub_comment:Comment)-[response_by:RESPONSE_BY*]-(fan2:Fan) with \
        team,on,comment,response_to,sub_comment,response_by,fan2 match (comment)-[comment_by:COMMENT_BY*] \
        -(fan:Fan) return team,on,comment,comment_by,fan,response_to,sub_comment,response_by,fan2'

        res = self._graph.run(q)
        res = res.data(
        )  #Checking through google and apply in this way can help me to collect complete data
        #res = list(res)
        print('q:', q)
        return res
Exemple #21
0
# slice the dataframe for the indexes and rows of choice
# questions = [val for val in data.columns[1:5]]
# answers = data.iloc[1:4, 1:5]
# respondents = data.iloc[1:4, 0]

# print(data[0].columns)
col = data[0].columns
for indexs in data[0].index:
    notes = data[0].loc[indexs].values
    labels = notes[1].replace(' ', '').replace('.', '')
    str = "CREATE (" + labels + ":" + notes[0] + "{" + col[1] + ":'" + notes[
        1] + "'," + col[2] + ":'" + notes[2] + "'," + col[3] + ":'" + notes[
            3] + "'})"
    str = str
    print(str)
    graph.run(str)
#    graph.run("CREATE (n:BBB{name:'ssss'}) return n")

col = data[1].columns
for indexs in data[1].index:
    notes = data[1].loc[indexs].values
    #    str="CREATE (n:"+notes[0]+"{"+col[3]+":'"+notes[3]+"',"+col[2]+":'"+notes[2]+"',"+col[1]+":'"+notes[1]+"'}) return n"
    str = "MATCH (a:" + notes[0] + "),(b:" + notes[3] + ")  WHERE a." + col[
        1] + " = '" + notes[1] + "' AND b." + col[1] + "= '" + notes[
            4] + "'  CREATE (a)-[" + col[2] + ":" + notes[2] + "  {" + col[
                5] + ":['" + notes[5] + "']}]->(b)"
    print(str)

    graph.run(str)

    # print(data.columns[cols]+"="+notes[cols])
Exemple #22
0
class NeoTalentos():
    file_loc = ''
    g = ''

    def __init__(self):
        self.file_location = "file:///talentos1.csv"
        self.g = Graph("http://neo4j:7474/db/data/")

    def RunAll(self):
        self.g.run(
            "CREATE CONSTRAINT ON (t:Talento) ASSERT t.nombre IS UNIQUE")
        self.g.run(
            "CREATE CONSTRAINT ON (u:Ubicacion) ASSERT u.lugar IS UNIQUE")
        self.g.run("CREATE CONSTRAINT ON (g:Grado) ASSERT g.grados IS UNIQUE")
        self.g.run(
            "CREATE CONSTRAINT ON (h:Habilidad) ASSERT h.habilidades IS UNIQUE"
        )
        self.g.run(
            "CREATE CONSTRAINT ON (a:AlmaMater) ASSERT a.almaMater IS UNIQUE")
        self.g.run(
            "CREATE CONSTRAINT ON (i:Idioma) ASSERT i.idiomas IS UNIQUE")

        self.g.run("""
            LOAD CSV WITH HEADERS FROM '%s' AS row
            MERGE (t:Talento {id: row.ID, nombre: row.ID_NOMBRE})
            ON CREATE SET t.idiomas = row.IDI_NOMBRE, t.habilidades = row.HAB_HABILIDAD,
            t.almaMater = row.EDU_INST, t.area = row.EDU_AREA, t.grados = row.EDU_TITULO,
            t.posicion = row.TRA_POSICION, t.empresa = row.TRA_EMPRESA, t.departamento = row.TRA_AREA,
            t.sector = row.TRA_TIPO, t.industria = row.TRA_IND, t.nacimiento = row.INF_FECHNAC,
            t.lugar = row.INF_UBICACION
            """ % self.file_location)

        self.g.run("""
            LOAD CSV WITH HEADERS FROM '%s' AS row
            UNWIND split(row.IDI_NOMBRE, ",") AS idiomasId
            WITH DISTINCT idiomasId
            MERGE (i:Idioma {idiomas: idiomasId})
            """ % self.file_location)

        self.g.run("""
            LOAD CSV WITH HEADERS FROM '%s' AS row
            WITH split(row.IDI_NOMBRE, ",") AS idiomas, row.ID_NOMBRE AS Nombre
            UNWIND idiomas AS idiomasId
            WITH DISTINCT idiomasId, Nombre
            MATCH (t:Talento {nombre: Nombre})
            MATCH (i:Idioma {idiomas: idiomasId})
            MERGE (t)-[:FLUYENTES_EN]->(i)
            """ % self.file_location)

        self.g.run("""
            LOAD CSV WITH HEADERS FROM '%s' AS row
            UNWIND split(row.HAB_HABILIDAD, ",") AS habilidadesId
            WITH DISTINCT habilidadesId
            MERGE (h:Habilidad {habilidades: habilidadesId})
            """ % self.file_location)

        self.g.run("""
            LOAD CSV WITH HEADERS FROM '%s' AS row
            WITH split(row.HAB_HABILIDAD, ",") AS habilidades, row.ID AS proyectoNo
            UNWIND habilidades AS habilidadesId
            WITH DISTINCT habilidadesId, proyectoNo
            MATCH (t:Talento {id: proyectoNo})
            MATCH (h:Habilidad {habilidades: habilidadesId})
            MERGE (t)-[:TIENE_HABILIDADES]->(h)

            """ % self.file_location)

        self.g.run("""
            LOAD CSV WITH HEADERS FROM '%s' AS row
            UNWIND split(row.EDU_INST, ",") AS almaMaters
            WITH DISTINCT almaMaters
            MERGE (:AlmaMater {almaMater: almaMaters})
            """ % self.file_location)

        self.g.run("""
            LOAD CSV WITH HEADERS FROM '%s' AS row
            WITH split(row.EDU_INST, ",") AS almas, row.ID AS No
            UNWIND almas AS almasId
            WITH DISTINCT almasId, No
            MATCH (t:Talento {id: No})
            MATCH (a:AlmaMater {almaMater: almasId})
            MERGE (t)-[:EGRESADO_DE]->(a)
            """ % self.file_location)

        self.g.run("""
            LOAD CSV WITH HEADERS FROM '%s' AS row
            UNWIND split(row.EDU_TITULO, ",") AS Grado
            WITH DISTINCT Grado
            MERGE (:Grado {grados: Grado})
            """ % self.file_location)

        self.g.run("""
            LOAD CSV WITH HEADERS FROM '%s' AS row
            WITH split(row.EDU_TITULO, ",") AS grados, row.ID AS proyectoNo
            UNWIND grados AS gradosId
            WITH DISTINCT gradosId, proyectoNo
            MATCH (t:Talento {id: proyectoNo})
            MATCH (g:Grado {grados: gradosId})
            MERGE (t)-[:EGRESADO_EN]->(g)
            """ % self.file_location)

        self.g.run("""
            LOAD CSV WITH HEADERS FROM '%s' AS row
            MERGE (u:Ubicacion {lugar: row.INF_UBICACION})
            """ % self.file_location)

        self.g.run("""
            LOAD CSV WITH HEADERS FROM '%s' AS row
            MATCH (t:Talento {id: row.ID})
            MATCH (u:Ubicacion {lugar: row.INF_UBICACION})
            MERGE (t)-[:UBICADO_EN]->(u)
            """ % self.file_location)
query = "match(n) return n"
machineQuery = "match (m:Machine) return m"
argusQuery = "match(m:Machine)-[]->(:Department)-[:BELONGS_TO]->(p:Plant {name:'Argus'}) return m"
suggestionQuery = "match  (cA:CorrectiveAction)<-[r]-(f:Fault)<-[:HAD_FAULT]-(m:Machine) return DISTINCT m.id, cA.suggestion"
machineStatusQuery = "match  (cA:CorrectiveAction)<-[r]-(f:Fault)<-[:HAD_FAULT]-(m:Machine) return DISTINCT m.id,m.maker,m.model,m.status,f.message,cA.suggestion ORDER BY m.id"

machine_Id_list = []
nextActionText = []
machine_ids = []
machine_makers = []
machine_models = []
machine_statuses = []
machine_messages = []
machine_suggestions = []

suggestions = graph.run(suggestionQuery)
for suggestion in suggestions:
    id = suggestion['m.id']
    nextActionText.append(suggestion['cA.suggestion'])

machines = graph.run(machineStatusQuery)
index = 0
currentMachineId = 0
for machine in machines:
    if(currentMachineId != machine['m.id']):
        currentMachineId = machine['m.id']
        machine_ids.append(machine['m.id'])
        machine_makers.append(machine['m.maker'])
        machine_models.append(machine['m.model'])
        machine_statuses.append(machine['m.status'])
        machine_messages.append(machine['f.message'])
Exemple #24
0
from py2neo import Graph
import sys
import json

graph = Graph(host='localhost', auth=('neo4j', 'abduabdu'))
city = sys.argv[1].replace('-', ' ')
attrac = sys.argv[2].replace('-', ' ')
query = """
    MATCH (p:Place)-[:Near]->(c:City)
    WHERE c.name="%s" and p.name="%s"
    RETURN p
""" % (city, attrac)
# query = """
#     MATCH (p:Place)-[:Near]->(c:City)
#     WHERE c.name="Los Angeles" and p.name="Greenbar Distillery"
#     RETURN p
# """
res = graph.run(query).data()

# attractions=[]
# for attr in res:
#     # print(attr['p']['name'])
#     attractions.append(attr['p']['name'])
try:
    ans = json.dumps(res[0]['p'])
except:
    ans = {}
print(ans, end='')
Exemple #25
0
def main(args):
    init_logger(args['--debug'])
    logging.info('connect to Neo4j DB')
    graph = Graph(password=DB_PASSWORD)

    # list ?
    if args['--list']:
        logging.info('available operating systems:')
        for os in OS.match(graph):
            logging.info('\t%s', os.name)
        return

    os_regex = args['<os_regex>']
    os_match = OS.match(graph).where("_.name =~ '{}'".format(os_regex))
    if os_match is None:
        logging.info(
            'unable to find OS that matches \'%s\' regex in the database',
            os_regex)
        return 1

    os_df_list = []
    # iterate over OS list, sorted by release date, converted from string to date object
    for os in sorted(
            os_match,
            key=lambda x: datetime.strptime(x.release_date, '%Y-%m-%d')):
        # TODO translate to py2neo API
        checksec_inodes = graph.run(OS_CHECKSEC_QUERY.format(os.name))
        c = Counter()
        for node in checksec_inodes:
            inode = node['i']
            logging.debug('%s: %s', inode['name'], inode['mime_type'])
            c['total'] += 1
            if inode['relro']:
                c['relro'] += 1
            if inode['canary']:
                c['canary'] += 1
            if inode['nx']:
                c['nx'] += 1
            if inode['rpath']:
                c['rpath'] += 1
            if inode['runpath']:
                c['runpath'] += 1
            if inode['symtables']:
                c['symtables'] += 1
            if inode['fortify_source']:
                c['fortify_source'] += 1

        logging.info('Results for %s', os.name)
        logging.info('Total binaries: %d', c['total'])
        for feature in PROTECTIONS:
            logging.info('%s: %.1f%%', feature, c[feature] * 100 / c['total'])

        # fix matplotlib, uses agg by default, non-gui backend
        matplotlib.use('tkagg')
        sns.set_style('whitegrid')

        per_data = []
        for feature in PROTECTIONS:
            value = c[feature] * 100 / c['total']
            per_data.append(value)
        # initialize OS Panda DataFrame
        df = pd.DataFrame({
            'Protections': PROTECTIONS,
            'Percentage': per_data,
            'OS': os.name
        })
        os_df_list.append(df)

    # concatenate all the individual DataFrames
    main_df = pd.concat(os_df_list, ignore_index=True)

    logging.info('Displaying results...')
    if len(os_df_list) == 1:
        ax = sns.barplot(x="Protections", y="Percentage", data=main_df)
        ax.set_title('{} binary security overview'.format(os_regex))
    else:
        ax = sns.barplot(x="Protections",
                         y="Percentage",
                         hue="OS",
                         data=main_df)
        ax.set_title(
            'binary security overview for regex "{}"'.format(os_regex))
    # show plot
    plt.legend(loc='upper right')
    plt.show()
class AnswerSearching:
    def __init__(self):
        self.graph = Graph("http://localhost:7474",
                           username="******",
                           password="******")
        self.top_num = 10

    def question_parser_graph(self, data):  # 输入的是从问题中抽取的到的实体
        """
        主要是根据不同的实体和意图构造cypher查询语句
        :param data: {"Disease":[], "Alias":[], "Symptom":[], "Complication":[],“Intentions”:[]}
        :return:
        """
        sql = []
        tag = ""
        if data:
            if data.get("Disease"):
                sql, tag = self.transfor_to_sql_graph("Disease",
                                                      data["Disease"],
                                                      data["Intentions"])
            elif data.get("Alias"):
                sql, tag = self.transfor_to_sql_graph("Alias", data["Alias"],
                                                      data["Intentions"])
            elif data.get("Symptom"):
                sql, tag = self.transfor_to_sql_graph("Symptom",
                                                      data["Symptom"],
                                                      data["Intentions"])
            elif data.get("Complication"):
                sql, tag = self.transfor_to_sql_graph("Complication",
                                                      data["Complication"],
                                                      data["Intentions"])

        return sql, tag

    # 查询关系节点
    # "match data=(na:company{id:'12399145'}) - [*1..3]->(nb:company) return data

    # "match(p)-[r]->(n:Person{Name:'%s'}) return  p.Name,r.relation,n.Name,p.cate,n.cate\
    #                Union all\
    #            match(p:Person {Name:'%s'}) -[r]->(n) return p.Name, r.relation, n.Name, p.cate, n.cate" % (name, name)

    def transfor_to_sql_graph(self, label, entities, intent):
        """
        将问题转变为cypher查询语句
        :param label:实体标签
        :param entities:实体列表
        :param intent:查询意图
        :return:cypher查询语句
        """
        if not entities:
            return []
        sqls = []
        tag = ""

        if label == "Disease":
            for e in entities:
                sql = [
                    "match (d:Disease{name:'%s'})<-[r]-(p) return r Union all match (d:Disease{name:'%s'})-[r]->(n) return r"
                    % (e, e)
                ]
                sqls += sql
                tag = "Disease"
        elif label == "Alias":
            for e in entities:
                sql = [
                    "match (d:Disease)-[:ALIAS_IS]->(a:Alias{name:'%s'}) with d (match (p)-[r]->(d:Disease) return r Union all match (d:Disease)-[r]->(n) return r)"
                    % e
                ]
                sqls += sql
                tag = "Disease"
        elif label == "Symptom":
            for e in entities:
                sql = [
                    "match (p)-[r]->(s:Symptom{name:'%s'}) return r Union all match (s:Symptom{name:'%s'})-[r]->(n) return r"
                    % (e, e)
                ]
                sqls += sql
                tag = "HAS_SYMPTOM"
        elif label == "Complication":
            for e in entities:
                sql = [
                    "match (p)-[r]->(c:Complication{name:'%s'}) return r Union all match (p:Complication{name:'%s'})-[r]->(n) return r"
                    % (e, e)
                ]
                sqls += sql
                tag = "Disease"

        return sqls, tag

    def searching_graph(self, sqls):
        answers = []
        for sql in sqls:
            ress = self.graph.run(sql).data()
            answers += ress
        return answers

    def question_parser(self, data):  # 输入的是从问题中抽取的到的实体
        """
        主要是根据不同的实体和意图构造cypher查询语句
        :param data: {"Disease":[], "Alias":[], "Symptom":[], "Complication":[],“Intentions”:[]}
        :return:
        """
        sqls = []
        if data:
            for intent in data["Intentions"]:
                sql_ = {}
                sql_["intention"] = intent
                sql = []
                if data.get("Disease"):
                    sql = self.transfor_to_sql("Disease", data["Disease"],
                                               intent)
                    # print(sql)
                elif data.get("Alias"):
                    sql = self.transfor_to_sql("Alias", data["Alias"], intent)
                elif data.get("Symptom"):
                    sql = self.transfor_to_sql("Symptom", data["Symptom"],
                                               intent)
                elif data.get("Complication"):
                    sql = self.transfor_to_sql("Complication",
                                               data["Complication"], intent)

                if sql:
                    sql_['sql'] = sql
                    sqls.append(sql_)
        return sqls

    def transfor_to_sql(self, label, entities, intent):
        """
        将问题转变为cypher查询语句
        :param label:实体标签
        :param entities:实体列表
        :param intent:查询意图
        :return:cypher查询语句
        """
        if not entities:
            return []
        sql = []
        # print(type(entities))
        # for e in entities:
        #     print(e)
        # 查询症状
        if intent == "query_symptom" and label == "Disease":
            sql = [
                "MATCH (d:Disease)-[:HAS_SYMPTOM]->(s) WHERE d.name='{0}' RETURN d.name,s.name"
                .format(e) for e in entities
            ]
        if intent == "query_symptom" and label == "Alias":
            sql = ["MATCH (a:Alias)<-[:ALIAS_IS]-(d:Disease)-[:HAS_SYMPTOM]->(s) WHERE a.name='{0}' return " \
                   "d.name,s.name".format(e) for e in entities]

        # 查询治疗方法
        if intent == "query_cureway" and label == "Disease":
            sql = ["MATCH (d:Disease)-[:HAS_DRUG]->(n) WHERE d.name='{0}' return d.name,d.treatment," \
                   "n.name".format(e) for e in entities]
        if intent == "query_cureway" and label == "Alias":
            sql = ["MATCH (n)<-[:HAS_DRUG]-(d:Disease)-[]->(a:Alias) WHERE a.name='{0}' " \
                   "return d.name, d.treatment, n.name".format(e) for e in entities]
        if intent == "query_cureway" and label == "Symptom":
            sql = ["MATCH (n)<-[:HAS_DRUG]-(d:Disease)-[]->(s:Symptom) WHERE s.name='{0}' " \
                   "return d.name,d.treatment, n.name".format(e) for e in entities]
        if intent == "query_cureway" and label == "Complication":
            sql = ["MATCH (n)<-[:HAS_DRUG]-(d:Disease)-[]->(c:Complication) WHERE c.name='{0}' " \
                   "return d.name,d.treatment, n.name".format(e) for e in entities]

        # 查询治疗周期
        if intent == "query_period" and label == "Disease":
            sql = [
                "MATCH (d:Disease) WHERE d.name='{0}' return d.name,d.period".
                format(e) for e in entities
            ]
        if intent == "query_period" and label == "Alias":
            sql = [
                "MATCH (d:Disease)-[]->(a:Alias) WHERE a.name='{0}' return d.name,d.period"
                .format(e) for e in entities
            ]
        if intent == "query_period" and label == "Symptom":
            sql = [
                "MATCH (d:Disease)-[]->(s:Symptom) WHERE s.name='{0}' return d.name,d.period"
                .format(e) for e in entities
            ]
        if intent == "query_period" and label == "Complication":
            sql = ["MATCH (d:Disease)-[]->(c:Complication) WHERE c.name='{0}' return d.name," \
                   "d.period".format(e) for e in entities]

        # 查询治愈率
        if intent == "query_rate" and label == "Disease":
            sql = [
                "MATCH (d:Disease) WHERE d.name='{0}' return d.name,d.rate".
                format(e) for e in entities
            ]
        if intent == "query_rate" and label == "Alias":
            sql = [
                "MATCH (d:Disease)-[]->(a:Alias) WHERE a.name='{0}' return d.name,d.rate"
                .format(e) for e in entities
            ]
        if intent == "query_rate" and label == "Symptom":
            sql = [
                "MATCH (d:Disease)-[]->(s:Symptom) WHERE s.name='{0}' return d.name,d.rate"
                .format(e) for e in entities
            ]
        if intent == "query_rate" and label == "Complication":
            sql = ["MATCH (d:Disease)-[]->(c:Complication) WHERE c.name='{0}' return d.name," \
                   "d.rate".format(e) for e in entities]

        # 查询检查项目
        if intent == "query_checklist" and label == "Disease":
            sql = [
                "MATCH (d:Disease) WHERE d.name='{0}' return d.name,d.checklist"
                .format(e) for e in entities
            ]
        if intent == "query_checklist" and label == "Alias":
            sql = [
                "MATCH (d:Disease)-[]->(a:Alias) WHERE a.name='{0}' return d.name,d.checklist"
                .format(e) for e in entities
            ]
        if intent == "query_checklist" and label == "Symptom":
            sql = ["MATCH (d:Disease)-[]->(s:Symptom) WHERE s.name='{0}' return d.name," \
                   "d.checklist".format(e) for e in entities]
        if intent == "query_checklist" and label == "Complication":
            sql = ["MATCH (d:Disease)-[]->(c:Complication) WHERE c.name='{0}' return d.name," \
                   "d.checklist".format(e) for e in entities]

        # 查询科室
        if intent == "query_department" and label == "Disease":
            sql = ["MATCH (d:Disease)-[:DEPARTMENT_IS]->(n) WHERE d.name='{0}' return d.name," \
                   "n.name".format(e) for e in entities]
        if intent == "query_department" and label == "Alias":
            sql = ["MATCH (n)<-[:DEPARTMENT_IS]-(d:Disease)-[:ALIAS_IS]->(a:Alias) WHERE a.name='{0}' " \
                   "return d.name,n.name".format(e) for e in entities]
        if intent == "query_department" and label == "Symptom":
            sql = ["MATCH (n)<-[:DEPARTMENT_IS]-(d:Disease)-[:HAS_SYMPTOM]->(s:Symptom) WHERE s.name='{0}' " \
                   "return d.name,n.name".format(e) for e in entities]
        if intent == "query_department" and label == "Complication":
            sql = ["MATCH (n)<-[:DEPARTMENT_IS]-(d:Disease)-[:HAS_COMPLICATION]->(c:Complication) WHERE " \
                   "c.name='{0}' return d.name,n.name".format(e) for e in entities]

        # 查询疾病
        if intent == "query_disease" and label == "Alias":
            sql = ["MATCH (d:Disease)-[]->(s:Alias) WHERE s.name='{0}' return " \
                   "d.name".format(e) for e in entities]

        if intent == "query_disease" and label == "Symptom":
            sql = ["MATCH (d:Disease)-[]->(s:Symptom) WHERE s.name='{0}' return " \
                   "d.name".format(e) for e in entities]

        # 查询疾病描述
        if intent == "disease_describe" and label == "Alias":
            sql = ["MATCH (d:Disease)-[]->(a:Alias) WHERE a.name='{0}' return d.name,d.age," \
                   "d.insurance,d.infection,d.checklist,d.period,d.rate,d.money".format(e) for e in entities]
        if intent == "disease_describe" and label == "Disease":
            sql = ["MATCH (d:Disease) WHERE d.name='{0}' return d.name,d.age,d.insurance,d.infection," \
                   "d.checklist,d.period,d.rate,d.money".format(e) for e in entities]
        if intent == "disease_describe" and label == "Symptom":
            sql = ["MATCH (d:Disease)-[]->(s:Symptom) WHERE s.name='{0}' return d.name,d.age," \
                   "d.insurance,d.infection,d.checklist,d.period,d.rate,d.money".format(e) for e in entities]
        if intent == "disease_describe" and label == "Complication":
            sql = ["MATCH (d:Disease)-[]->(c:Complication) WHERE c.name='{0}' return d.name," \
                   "d.age,d.insurance,d.infection,d.checklist,d.period,d.rate,d.money".format(e) for e in entities]

        return sql

    def searching(self, sqls):
        """
        执行cypher查询,返回结果
        :param sqls:
        :return:str
        """
        final_answers = []
        for sql_ in sqls:
            intent = sql_['intention']
            queries = sql_['sql']
            answers = []
            for query in queries:
                ress = self.graph.run(query).data()
                # print(ress)
                answers += ress
            final_answer = self.answer_template(intent, answers)
            if final_answer:
                final_answers.append(final_answer)
        return final_answers

    def answer_template(self, intent, answers):
        """
        根据不同意图,返回不同模板的答案
        :param intent: 查询意图
        :param answers: 知识图谱查询结果
        :return: str
        """
        final_answer = ""
        if not answers:
            return ""
        # 查询症状
        if intent == "query_symptom":
            disease_dic = {}
            for data in answers:
                d = data['d.name']
                s = data['s.name']
                if d not in disease_dic:
                    disease_dic[d] = [s]
                else:
                    disease_dic[d].append(s)
            i = 0
            for k, v in disease_dic.items():
                if i >= 10:
                    break
                final_answer += "疾病 {0} 的症状有:{1}\n".format(
                    k, ','.join(list(set(v))))
                i += 1
        # 查询疾病
        if intent == "query_disease":
            disease_freq = {}
            for data in answers:
                d = data["d.name"]
                disease_freq[d] = disease_freq.get(d, 0) + 1
            n = len(disease_freq.keys())
            freq = sorted(disease_freq.items(),
                          key=lambda x: x[1],
                          reverse=True)
            for d, v in freq[:10]:
                final_answer += "疾病为 {0} 的概率为:{1}\n".format(d, v / 10)
        # 查询治疗方法
        if intent == "query_cureway":
            disease_dic = {}
            for data in answers:
                disease = data['d.name']
                treat = data["d.treatment"]
                drug = data["n.name"]
                if disease not in disease_dic:
                    disease_dic[disease] = [treat, drug]
                else:
                    disease_dic[disease].append(drug)
            i = 0
            for d, v in disease_dic.items():
                if i >= 10:
                    break
                final_answer += "疾病 {0} 的治疗方法有:{1};可用药品包括:{2}\n".format(
                    d, v[0], ','.join(v[1:]))
                i += 1
        # 查询治愈周期
        if intent == "query_period":
            disease_dic = {}
            for data in answers:
                d = data['d.name']
                p = data['d.period']
                if d not in disease_dic:
                    disease_dic[d] = [p]
                else:
                    disease_dic[d].append(p)
            i = 0
            for k, v in disease_dic.items():
                if i >= 10:
                    break
                final_answer += "疾病 {0} 的治愈周期为:{1}\n".format(
                    k, ','.join(list(set(v))))
                i += 1
        # 查询治愈率
        if intent == "query_rate":
            disease_dic = {}
            for data in answers:
                d = data['d.name']
                r = data['d.rate']
                if d not in disease_dic:
                    disease_dic[d] = [r]
                else:
                    disease_dic[d].append(r)
            i = 0
            for k, v in disease_dic.items():
                if i >= 10:
                    break
                final_answer += "疾病 {0} 的治愈率为:{1}\n".format(
                    k, ','.join(list(set(v))))
                i += 1
        # 查询检查项目
        if intent == "query_checklist":
            disease_dic = {}
            for data in answers:
                d = data['d.name']
                r = data['d.checklist']
                if d not in disease_dic:
                    disease_dic[d] = [r]
                else:
                    disease_dic[d].append(r)
            i = 0
            for k, v in disease_dic.items():
                if i >= 10:
                    break
                final_answer += "疾病 {0} 的检查项目有:{1}\n".format(
                    k, ','.join(list(set(v))))
                i += 1
        # 查询科室
        if intent == "query_department":
            disease_dic = {}
            for data in answers:
                d = data['d.name']
                r = data['n.name']
                if d not in disease_dic:
                    disease_dic[d] = [r]
                else:
                    disease_dic[d].append(r)
            i = 0
            for k, v in disease_dic.items():
                if i >= 10:
                    break
                final_answer += "疾病 {0} 所属科室有:{1}\n".format(
                    k, ','.join(list(set(v))))
                i += 1
        # 查询疾病描述
        if intent == "disease_describe":
            disease_infos = {}
            for data in answers:
                name = data['d.name']
                age = data['d.age']
                insurance = data['d.insurance']
                infection = data['d.infection']
                checklist = data['d.checklist']
                period = data['d.period']
                rate = data['d.rate']
                money = data['d.money']
                if name not in disease_infos:
                    disease_infos[name] = [
                        age, insurance, infection, checklist, period, rate,
                        money
                    ]
                else:
                    disease_infos[name].extend([
                        age, insurance, infection, checklist, period, rate,
                        money
                    ])
            i = 0
            for k, v in disease_infos.items():
                if i >= 10:
                    break
                message = "疾病 {0} 的描述信息如下:\n发病人群:{1}\n医保:{2}\n传染性:{3}\n检查项目:{4}\n" \
                          "治愈周期:{5}\n治愈率:{6}\n费用:{7}\n"
                final_answer += message.format(k, v[0], v[1], v[2], v[3], v[4],
                                               v[5], v[6])
                i += 1

        return final_answer
Exemple #27
0
    exit()

# graph = Graph(password="******")
graph = Graph(password=inp)

print("Enter the symptoms with space")
inp = [str(i).lower() for i in input().split()]

if inp == []:
    print("Entered input is not valid.Please try again by entering symptoms with space")
    exit()

for i in range(len(inp)):
    try:
        results = graph.run('''match (s:Symptom)
        Where toLower(s.name) = $symptom
        return DISTINCT s.name as Symptoms''', parameters={'symptom' : inp[i]}).data()
    except:
        print("Entered Neo4j graph password is wrong. Please try again")
        exit()
    if results == []:
        print(f"Symptom '{inp[i]}' does not exists in our database. Please try again")
        exit()
    
results = graph.run('''with $in as symptoms
match (s:Symptom)
Where toLower(s.name) in symptoms
with collect(s) as symptoms
match (d:Disease)
where all(s in symptoms Where (s)-[:CAUSES]->(d))
return DISTINCT d.name as Disease, d.id as Disease_id''', parameters={'in' : inp}).data()
Exemple #28
0
import time
from py2neo import Graph, Node
import networkx as nx
graph = Graph(password="******")
query = '''
        optional match(a:Application)-[:HAS_PREVIOUS]->(p)-[:HAS_PREV_INSTALLMENTS]->(i) with
toInteger(i.DAYS_INSTALMENT)-toInteger(i.DAYS_ENTRY_PAYMENT) as DaysInstallMinusEntry,
count(i) as TotalInstallments
optional match(a)-[:HAS_BUREAU]->(bureau) with size(filter(x IN collect(bureau.CREDIT_ACTIVE) WHERE x="Active")) as TotalActiveBureau,a.SK_ID_CURR as ApplicationID,count(bureau) as TotalBureau, TotalInstallments,DaysInstallMinusEntry
return ApplicationID,TotalInstallments,DaysInstallMinusEntry,TotalBureau,
toFloat(TotalActiveBureau)/toFloat(TotalBureau) as ActiveBureauByTotalBureauRatio order by ActiveBureauByTotalBureauRatio desc
        '''
start = time.time()
data = graph.run(query)
end = time.time()
total = end - start
#import ipdb; ipdb.set_trace()
#time = timeit.Timer('data = graph.run(query)')
print(f"It took  {total} seconds to run the query")
Exemple #29
0
class MAIN:
    def __init__(self):
        # 创建实例
        self.g = Graph(
            host="127.0.0.1",  # neo4j 搭载服务器的ip地址,ifconfig可获取到
            http_port=7474,  # neo4j 服务器监听的端口号
            user="******",  # 数据库user name,如果没有更改过,应该是neo4j
            password="******")

    def write(self, csvfile, message):  # 写入csv数据
        newfile = open(csvfile, 'a+', newline='')
        filewriter = csv.writer(newfile)
        filewriter.writerows(message)

    def open(self, file):
        self.file = file
        with open(self.file, 'r') as f:
            self.reader = csv.reader(f)
            self.messages = list(self.reader)
            print(self.messages)

    def tra_attribute(self):  # 经测试,neo4j的类型名、属性内容可以是中文,但属性名必须是英文。因此先要把所有的属性名全部翻译成英文。
        list_attribute = []
        for messages in self.messages[1:]:
            for message in messages:
                if r'#' in message:
                    for i in message.split(r'@@'):
                        if r'#' in i:
                            node_attribute = i.split(r'#')
                            list_attribute.append(node_attribute[0])
        list_attribute = list(set(list_attribute))
        lists_attribute = []
        for i in list_attribute:
            j = []
            j.append(i)
            j.append('')
            lists_attribute.append(j)
        self.write(csvfile='C:\\chouquxinxi\\出行指南\\属性名.csv', message=lists_attribute)
        # 后面需要手动打开该文件,手动翻译属性名,写在第二列。弄了半天自动翻译没弄成,有点尴尬。

    def cypher_make_send(self):  # 核心方法
        # 读取翻译结果
        dict_attribute = {}
        with open('C:\\chouquxinxi\\出行指南\\属性名.csv', 'r') as f:
            reader = csv.reader(f)
            for l in reader:
                dict_attribute[l[0]] = l[1]

        # 判断每行数据信息
        for message in self.messages[1:]:
            if len(message) == 2 or message[2] == '':  # 2列信息描述结点
                node_type = message[0]       # 节点类型
                node_attributes = message[1]    # 节点属性
                dict_node_attribute = {}  # 属性字典,下面做这个字典
                for i in node_attributes.split(r'@@'):
                    node_attribute = i.split(r'#')     # 节点属性分段,node_attribute[0]属性名,node_attribute[1]属性值
                    node_attribute[0] = dict_attribute[node_attribute[0]]  # 属性名翻译为英文
                    dict_node_attribute[node_attribute[0]] = node_attribute[1]  # 转为字典
                # print(dict_node_attribute)
                if '名称' in node_attributes:  # 如果这一条信息里面有“名称”字样,则说明该信息是对单个节点进行操作。
                    # 准备这么操作:建立该节点,然后一条属性一条属性的添加
                    namevalue = dict_node_attribute.pop('name')  # 删除要删除的键值对"name",返回值是删除的值name的内容。剩下的就是需要创建或修订的其他属性
                    cypher = "MERGE (m:%s { name: '%s' })  RETURN m.name" % (node_type, namevalue)
                    # 上一句的意思是,如果该节点类型、名称不存在,则创建该节点。
                    re = self.g.run(cypher).data()
                    print("-->节点<--新建节点:%s" % (re))
                    for key, value in dict_node_attribute.items():
                        cypher = "MERGE (m:%s { name: '%s' })   \
                                ON MATCH SET m.%s = '%s'    \
                                RETURN m.%s" % (node_type, namevalue, key, value, key)
                        re = self.g.run(cypher).data()
                        print("-->节点<--'%s':新建或修改属性:%s" % (namevalue, re))
                else:  # 如果不包含“名称”字样,则说明该信息是所有该类型的节点进行操作
                    for key, value in dict_node_attribute.items():
                        cypher = "MERGE (m:%s)              \
                                ON MATCH SET m.%s = '%s'     \
                                RETURN m.%s" % (node_type, key, value, key)
                        re = self.g.run(cypher).data()
                        print("-->节点<--新建或修改属性:%s" % (re))

            elif len(message) == 3:  # 3列信息描述关系
                relationships = message[0]  # 关系
                dict_relationships = {}  # 关系字典,下面做这个字典
                if r'@@' in relationships:  # 如果包含分割符,说明包含属性,此时需要把属性读取出来
                    list_relationships = relationships.split(r'@@')
                    dict_relationships['type'] = list_relationships[0]  # 关系类型
                    for i in list_relationships[1:]:
                        relationships_attribute = i.split(r'#')
                        relationships_attribute[0] = dict_attribute[relationships_attribute[0]]  # 属性名翻译为英文
                        dict_relationships[relationships_attribute[0]] = relationships_attribute[1]  # 转为字典
                else:
                    dict_relationships['type'] = relationships

                node_start = message[1]  # 起节点
                dict_node_start = {}  # 起节点属性字典,下面做这个字典
                for i in node_start.split(r'@@'):
                    node1_attribute = i.split(r'#')
                    node1_attribute[0] = dict_attribute[node1_attribute[0]]  # 属性名翻译为英文
                    dict_node_start[node1_attribute[0]] = node1_attribute[1]  # 转为字典

                node_end = message[2]  # 止节点
                dict_node_end = {}  # 止节点属性字典,下面做这个字典
                for i in node_end.split(r'@@'):
                    node2_attribute = i.split(r'#')     # 节点属性分段,node_attribute[0]属性名,node_attribute[1]属性值
                    node2_attribute[0] = dict_attribute[node2_attribute[0]]  # 属性名翻译为英文
                    dict_node_end[node2_attribute[0]] = node2_attribute[1]  # 转为字典

                #下面开始写语句

                start_type = ''
                start_attribute = ''
                if 'type' in dict_node_start:  # 如果包含类型
                    start_type = ":%s" % (dict_node_start['type'])
                    dict_node_start.pop('type')
                for key, value in dict_node_start.items():
                    start_attribute = start_attribute + "%s:'%s'" % (key, value) + ','
                if start_attribute == '':
                    cypher_node_start = start_type
                else:
                    cypher_node_start = start_type+'{'+start_attribute[:-1]+'}'  #[:-1]把最后一个逗号去掉

                end_type = ''
                end_attribute = ''
                if 'type' in dict_node_end:  # 如果包含类型
                    end_type = ":%s" % (dict_node_end['type'])
                    dict_node_end.pop('type')
                for key, value in dict_node_end.items():
                    end_attribute = end_attribute + "%s:'%s'" % (key, value) + ','
                if end_attribute == '':
                    cypher_node_end = end_type
                else:
                    cypher_node_end = end_type + '{' + end_attribute[:-1] + '}'

                relationships_type = ''
                relationships_attribute = ''
                if 'type' in dict_relationships:  # 如果包含类型
                    relationships_type = ":%s" % (dict_relationships['type'])
                    dict_relationships.pop('type')
                for key, value in dict_relationships.items():
                    relationships_attribute = relationships_attribute + "%s:'%s'" % (key, value) + ','
                if relationships_attribute == '':
                    cypher_relationships = relationships_type
                else:
                    cypher_relationships = relationships_type + '{' + relationships_attribute[:-1] + '}'

                cypher = " MATCH(m" + cypher_node_start + "),(n" + cypher_node_end + ")  MERGE(m)-[r" + cypher_relationships +"]->(n)   RETURN m,n,r"
                re = self.g.run(cypher).data()
                print("-->关系<--新建或修改关系:%s" % (re))
Exemple #30
0
import csv
from py2neo import Graph, Node, Relationship
import tqdm

# /var/lib/neo4j/import

if __name__ == "__main__":
    graph = Graph("bolt://127.0.0.1:7687", username="******", password="******")
    graph.run('MATCH ()-[r:HasSpeaker]->() DELETE r')
    graph.run('MATCH ()-[r:HasTrack]->() DELETE r')
    graph.run('MATCH (n:Title) DELETE n')
    graph.run('MATCH (n:Title) DELETE n')
    graph.run('MATCH (n:Speaker) DELETE n')
    graph.run('MATCH (n:Track) DELETE n')

    with open('blackhat2019.csv', 'r', encoding='utf-8') as fp:
        reader = csv.reader(fp)
        header = next(reader)
        for row in tqdm.tqdm(reader):
            title, speakers, tracks, url = row
            graph.run('MERGE (:Title {name:"' + title + '", url:"' + url +
                      '"})')
            for speaker in speakers.split(','):
                tmp = speaker.replace('"', '')
                graph.run('MERGE (:Speaker {name:"' + tmp + '"})')
                graph.run('MATCH (t:Title {name:"' + title +
                          '"}), (s:Speaker {name:"' + tmp +
                          '"}) CREATE (t)-[:HasSpeaker]->(s)')
            for track in tracks.split(','):
                graph.run('MERGE (:Track {name:"' + track + '"})')
                graph.run('MATCH (ti:Title {name:"' + title +
def save_data():
    graph = Graph(
        "http://localhost:7474",
        username="******",
        password="******"
    )
    instr = "MATCH (n)-[r]-(m) DELETE r"  # 删除所有关系
    graph.run(instr)  # 直接调用Cypher命令对数据库进行操作
    instr = "MATCH (n) DELETE n"  # 删除所有节点
    graph.run(instr)

    f = open('C:/Users/lenovo/.Neo4jDesktop/neo4jDatabases/database-6fcac2d7-02fa-494d-a484-e782161e2887/installation-3.5.6/import/question.csv', 'r', encoding='utf-8')
    csv_reader_lines = csv.reader(f)
    next(csv_reader_lines)
    count = 1
    for line in csv_reader_lines:
        print(str(count) + str(line))
        node = Node("Question", question_id=line[0], question_title=line[1], question_body=line[2],
                    question_vote=line[3])
        graph.create(node)
        count = count + 1

    f = open('C:/Users/lenovo/.Neo4jDesktop/neo4jDatabases/database-6fcac2d7-02fa-494d-a484-e782161e2887/installation-3.5.6/import/answer.csv', 'r', encoding='utf-8')
    csv_reader_lines = csv.reader(f)
    next(csv_reader_lines)
    count = 1
    for line in csv_reader_lines:
        print(str(count) + str(line))
        node = Node("Answer", answer_id=line[0], answer_body=line[1], answer_vote=line[2])
        graph.create(node)
        count = count + 1

    tag_list = []
    f = open('C:/Users/lenovo/.Neo4jDesktop/neo4jDatabases/database-6fcac2d7-02fa-494d-a484-e782161e2887/installation-3.5.6/import/tag.csv', 'r', encoding='utf-8')
    csv_reader_lines = csv.reader(f)
    next(csv_reader_lines)
    count = 1
    for line in csv_reader_lines:
        if line[0] in tag_list:
            continue
        else:
            print(str(count) + str(line))
            tag_list.append(line[0])
            node = Node("Tag", tag_name=line[0], tag_description=line[1])
            graph.create(node)
            count = count + 1

    f = open('C:/Users/lenovo/.Neo4jDesktop/neo4jDatabases/database-6fcac2d7-02fa-494d-a484-e782161e2887/installation-3.5.6/import/answer.csv', 'r', encoding='utf-8')
    csv_reader_lines = csv.reader(f)
    next(csv_reader_lines)
    count = 1
    for line in csv_reader_lines:
        print(str(count) + " Relation: Question-->Answer" + str(line))
        count = count + 1
        answer_id = line[0]
        question_id = line[3]
        instr = """
                match (a:Question),(b:Answer)
                where a.question_id = '""" + question_id + """' and b.answer_id = '""" + answer_id + """' 
                create (a)-[r:has_answer]->(b)
            """
        graph.run(instr)

    f = open('C:/Users/lenovo/.Neo4jDesktop/neo4jDatabases/database-6fcac2d7-02fa-494d-a484-e782161e2887/installation-3.5.6/import/tag.csv', 'r', encoding='utf-8')
    csv_reader_lines = csv.reader(f)
    next(csv_reader_lines)
    count = 1
    for line in csv_reader_lines:
        print(str(count) + " Relation: Question-->Tag" + str(line))
        count = count + 1
        tag_name = line[0]
        question_id = line[2]
        instr = """
                match (a:Question),(b:Tag)
                where a.question_id = '""" + question_id + """' and b.tag_name = '""" + tag_name + """' 
                create (a)-[r:has_tag]->(b)
            """
        graph.run(instr)
class AnswerSearcher:
    def __init__(self):
        self.g = Graph(
            # "http://localhost:7474/db/data"  # py2neo 2.0.8写法
            host="127.0.0.1",  # py2neo 3写法
            user="******",
            password="******")
        self.num_limit = 30

    '''执行cypher查询,并返回相应结果'''

    def search_main(self, sqls):
        final_answers = []
        for sql_ in sqls:
            question_type = sql_['question_type']
            queries = sql_['sql']
            answers = []
            for query in queries:
                ress = self.g.run(query).data()
                answers += ress
            final_answer = self.answer_prettify(question_type, answers)
            if final_answer:
                final_answers.append(final_answer)
        return final_answers

    '''根据对应的qustion_type,调用相应的回复模板'''

    def answer_prettify(self, question_type, answers):
        final_answer = []
        if not answers:
            return ''
        if question_type == 'disease_symptom':
            desc = [i['n.name'] for i in answers]
            subject = answers[0]['m.name']
            final_answer = '{0}的症状包括:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'symptom_disease':
            desc = [i['m.name'] for i in answers]
            subject = answers[0]['n.name']
            final_answer = '症状{0}可能染上的疾病有:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_cause':
            desc = [i['m.cause'] for i in answers]
            # print(answers)
            # print(desc)
            subject = answers[0]['m.name']
            final_answer = '{0}可能的成因有:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_prevent':
            desc = [i['m.prevent'] for i in answers]
            subject = answers[0]['m.name']
            final_answer = '{0}的预防措施包括:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_lasttime':
            desc = [i['m.cure_lasttime'] for i in answers]
            subject = answers[0]['m.name']
            final_answer = '{0}治疗可能持续的周期为:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_cureway':
            desc = [';'.join(i['m.cure_way']) for i in answers]
            subject = answers[0]['m.name']
            final_answer = '{0}可以尝试如下治疗:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_cureprob':
            desc = [i['m.cured_prob'] for i in answers]
            subject = answers[0]['m.name']
            final_answer = '{0}治愈的概率为(仅供参考):{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_getway':
            desc = [i['m.get_way'] for i in answers]
            subject = answers[0]['m.name']
            final_answer = '{0}的传播方式为:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_easyget':
            desc = [i['m.easy_get'] for i in answers]
            subject = answers[0]['m.name']
            final_answer = '{0}的易感人群包括:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_desc':
            desc = [i['m.desc'] for i in answers]
            subject = answers[0]['m.name']
            final_answer = '{0},熟悉一下:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_acompany':
            desc1 = [i['n.name'] for i in answers]
            desc2 = [i['m.name'] for i in answers]
            subject = answers[0]['m.name']
            desc = [i for i in desc1 + desc2 if i != subject]
            final_answer = '{0}的并发症包括:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_can_eat':
            desc = [answers[0]['m.can_eat']]
            # print(answers)
            # print(desc)
            subject = answers[0]['m.name']
            # print(subject)
            if desc:
                final_answer = '{0}可以吃/喝:{1}'.format(
                    subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_not_food':
            desc = [i['n.name'] for i in answers]
            subject = answers[0]['m.name']
            final_answer = '{0}忌食的食物包括有:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_do_food':
            do_desc = [i['n.name'] for i in answers if i['r.name'] == '宜吃']
            recommand_desc = [
                i['n.name'] for i in answers if i['r.name'] == '推荐食谱'
            ]
            subject = answers[0]['m.name']
            final_answer = '{0}推荐{1}\n推荐食谱包括有:{2}'.format(
                subject, ';'.join(list(set(do_desc))[:self.num_limit]),
                ';'.join(list(set(recommand_desc))[:self.num_limit]))

        elif question_type == 'food_not_disease':
            desc = [i['m.name'] for i in answers]
            subject = answers[0]['n.name']
            final_answer = '患有{0}的人最好不要吃{1}'.format(
                ';'.join(list(set(desc))[:self.num_limit]), subject)

        elif question_type == 'food_do_disease':
            desc = [i['m.name'] for i in answers]
            subject = answers[0]['n.name']
            final_answer = '患有{0}的人建议多试试{1}'.format(
                ';'.join(list(set(desc))[:self.num_limit]), subject)

        elif question_type == 'disease_drug':
            desc = [i['n.name'] for i in answers]
            subject = answers[0]['m.name']
            final_answer = '{0}通常的使用的药品包括:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'drug_disease':
            desc = [i['m.name'] for i in answers]
            subject = answers[0]['n.name']
            final_answer = '{0}主治的疾病有{1},可以试试'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'disease_check':
            desc = [i['n.name'] for i in answers]
            subject = answers[0]['m.name']
            final_answer = '{0}通常可以通过以下方式检查出来:{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        elif question_type == 'check_disease':
            desc = [i['m.name'] for i in answers]
            subject = answers[0]['n.name']
            final_answer = '通常可以通过{0}检查出来的疾病有{1}'.format(
                subject, ';'.join(list(set(desc))[:self.num_limit]))

        # print("final_answer: ",final_answer)
        return final_answer
        "Wisconsin","Wyoming"]

# Document type
docs = ["fraudulent", "fortune_100", "pennywise", "sold", "startup", "broke"]

doc_entries = []
for doc in docs:
    doc_entries.append({
        "people": [random.choice(people) for x in range(0,7)],
        "orgs": [random.choice(orgs) for x in range(0,5)],
        "gpes": [random.choice(gpes) for x in range(0,4)]
    })

graph = Graph(password='******')

graph.run("MATCH (n) OPTIONAL MATCH (n)-[r]-() DELETE n,r") # deleting existing data
create_str = []
create_str += ["({0}:gpe {{ name:'{1}' }})".format(gpe,gpe) for gpe in gpes]
create_str += ["({0}:org {{name:'{1}'}})".format(org,org) for org in orgs]
create_str += ["({0}:person {{name:'{1}'}})".format(person,person) for person in people]
create_str += ["({0}:doc {{name:'{1}'}})".format(doc,doc) for doc in docs]
graph.run("create "+",".join(create_str)) #inserting individual entities

for doc_name, entry in zip(docs, doc_entries):
    #associating gepes
    for gpe in entry["gpes"]:
        graph.run('MATCH (n:gpe {{name:"{0}"}}),(d:doc {{name:"{1}"}}) create (n)-[:indoc]->(d)'.format(gpe,doc_name))
    #associating orgs
    for org in entry["orgs"]:
        graph.run('MATCH (n:org {{name:"{0}"}}),(d:doc {{name:"{1}"}}) create (n)-[:indoc]->(d)'.format(org,doc_name))
    #associating people
Exemple #34
0
class AnswerSearcher:
    def __init__(self):
        self.g = Graph(
            host="39.100.119.153",  #127.0.0.1
            http_port=7474,  #7687
            user="******",  #neo4j
            password="******")  #admin
        self.num_limit = 20

    # 执行cypher查询,并返回相应结果
    def search_main(self, sqls):
        final_answers = []
        for sql_ in sqls['sqls']:
            question_type = sql_['question_type']
            queries = sql_['sql']
            answers = []
            for query in queries:
                ress = self.g.run(query).data()
                # .data返回的是一个字典组成的列表[{"n.name":"xx", "m.name":"xx", "r.name":"xx(关系名称,如"宜吃")"
                # print("ress:", ress)
                answers += ress
            final_answer = self.answer_prettify(question_type, answers,
                                                sqls['question'])
            if final_answer:
                final_answers.append(final_answer)
        return final_answers

    # 根据对应的qustion_type,调用相应的回复模板
    def answer_prettify(self, question_type, answers, question):
        final_answer = ''
        if not answers:
            return ''
        if question_type == 'check':
            desc = answers[0]['m.产品名称']
            subject = answers[0]['m.登记编码']
            if desc:
                final_answer = '{0}在“全国银行业理财产品登记系统”的登记编号为:{1},是银行发行的正规理财产品。'.format(
                    desc, subject)
            else:
                final_answer = '该产品未在理财系统查询到登记编码,无登记编码均不属于正规银行理财产品!'

        elif question_type == 'explanation_category':
            name = answers[0]['m.名词']
            desc = ''
            nature = ''
            user = ''
            for i in answers:
                if i['m.定义']:
                    desc = '定义为' + i['m.定义'] + '\n'
                elif i['m.特性']:
                    nature = '特性为' + i['m.特性'] + '\n'
                elif i['m.适用人群']:
                    user = '******' + i['m.适用人群'] + '\n'
            final_answer = '{0}:\n{1}{2}{3}'.format(name, desc, nature, user)

        elif question_type == 'explanation_noun':
            name = answers[0]['m.名词']
            desc = answers[0]['m.定义']
            final_answer = '{0}的定义为:{1}'.format(name, desc)

        elif question_type == 'notice_category':
            print(question_type)
            print(answers)
            name = answers[0]['m.名词']
            desc = ''
            nature = ''
            user = ''
            openform = ''
            type = ''

            for i in answers:
                try:
                    desc = '定义为' + i['m.定义'] + '\n'
                except:
                    pass
                try:
                    nature = '特性为' + i['m.特性'] + '\n'
                except:
                    pass
                try:
                    user = '******' + i['m.适用人群'] + '\n'
                except:
                    pass
                try:
                    openform = '开放形态为' + i['m.开放形态'] + '\n'
                except:
                    pass
                try:
                    type = '产品类型为' + i['m.产品类型']
                except:
                    pass

                final_answer += '{0}产品需要注意的事项有:\n{1}{2}{3}{4}{5}'.format(
                    name, desc, nature, user, openform, type)

        elif question_type == 'notice_attribution':
            attribution = answers[0]['m.名称']
            diff = answers[0]['m.类别差异']
            cate = answers[0]['m.类别']
            subject = [i['n.名词'] for i in answers]
            final_answer = '{0}需要注意的事项有:\n(1){0}含有的类别有:{1}。\n(2)类别差异为:{2}。'.format(
                attribution, '、'.join(list(set(subject))), cate, diff)

        elif question_type == 'notice_product':
            name = answers[0]['m.产品名称']
            print(answers)
            desc = ''
            desc1 = ''
            nature = ''
            user = ''
            openform = ''
            type = ''
            for i in answers:
                try:
                    desc = i['n.名词']
                except:
                    pass
                try:
                    desc1 = '的定义为' + i['n.定义'] + '\n'
                except:
                    pass
                try:
                    nature = '特性为' + i['n.特性'] + '\n'
                except:
                    pass
                try:
                    user = '******' + i['n.适用人群'] + '\n'
                except:
                    pass
                try:
                    openform = '开放形态为' + i['n.开放形态'] + '\n'
                except:
                    pass
                try:
                    type = '产品类型为' + i['n.产品类型']
                except:
                    pass

            order = 1
            if nature != '':
                nature = '({})'.format(order) + nature
                order = order + 1
            if user != '':
                user = '******'.format(order) + user
                order = order + 1
            if openform != '':
                openform = '({})'.format(order) + openform
                order = order + 1
            if type != '':
                type = '({})'.format(order) + type
                order = order + 1

            final_answer += '{0}属于{1}的产品,需要注意的事项有:\n{1}{2}{3}{4}{5}{6}'.format(
                name, desc, desc1, nature, user, openform, type)

        elif question_type == 'call_number':
            name = answers[0]['m.名称']
            subject = ''
            print(answers)
            try:
                subject = answers[0]['m.客服电话']
            except:
                pass
            try:
                subject = answers[1]['m.咨询电话']
            except:
                pass
            final_answer += '{0}的咨询电话为:{1}。'.format(name, subject)

        elif question_type == 'bank_product':
            bank = answers[0]['m.名称']
            print(answers)
            state = [i['n.产品状态'] for i in answers]
            product = [i['n.产品名称'] for i in answers]
            final_answer = '以下为{0}最新的5款产品:'.format(bank)
            list_zip = list(zip(product, state))
            order = 0
            for i in list_zip:
                order += 1
                final_answer += '\n({0}){1},产品状态为:{2}'.format(
                    order, i[0], i[1])

        elif question_type == 'bank_category_product':
            bank = answers[0]['m.名称']
            cate = answers[0]['p.名词']
            state = [i['n.产品状态'] for i in answers]
            product = [i['n.产品名称'] for i in answers]
            final_answer = '以下为{0}最新的5款{1}产品:'.format(bank, cate)
            list_zip = list(zip(product, state))
            order = 0
            for i in list_zip:
                order += 1
                final_answer += '\n({0}){1},产品状态为:{2}'.format(
                    order, i[0], i[1])

        elif question_type == 'product_desc':
            name = answers[0]['m.产品名称']
            djbm = answers[0]['m.登记编码']
            qxlx = answers[0]['m.期限类型']
            yjbjbz = answers[0]['m.业绩比较基准']
            try:
                fxjg = answers[1]['n.名称']
            except:
                fxjg = ''
            try:
                mjfs = answers[2]['n.名词']
            except:
                mjfs = ''
            try:
                yxms = answers[3]['n.名词']
            except:
                yxms = ''
            try:
                tzxz = answers[4]['n.名词']
            except:
                tzxz = ''
            final_answer = '{0}的简介如下:\n登记编码:{1}\n期限类型:{2}\n业绩比较基准:{3}\n发行机构:{4}\n募集方式:{5}\n运作模式:{6}\n投资性质:{7}'.format(
                name, djbm, qxlx, yjbjbz, fxjg, mjfs, yxms, tzxz)

        elif question_type == 'bank_desc':
            name = answers[0]['m.名称']
            time = answers[0]['m.营业时间']
            call = answers[0]['m.客服电话']
            url = answers[0]['m.官网链接']
            final_answer = '下面为您介绍{0}的相关信息:\n营业时间:{1}\n客服电话:{2}\n官网链接:{3}'.format(
                name, time, call, url)

        elif question_type == 'url':
            name = answers[0]['m.名称']
            url = answers[0]['m.官网链接']
            final_answer = '{0}的官网链接为:\n{1}'.format(name, url)

        elif question_type == 'area_subbank_addr':
            bank = answers[0]['n.名称']
            areas = [i['m.区域'] for i in answers]
            area = set(areas)
            final_answer += '{0}在{1}的网点分布如下:'.format(bank, area)
            subbank = [i['m.名称'] for i in answers]
            addr = [i['m.具体地址'] for i in answers]
            list_zip = list(zip(subbank, addr, areas))
            for a in set(areas):
                for i in list_zip:
                    if i[2] != a:
                        continue
                    if i[2] != area:
                        area = i[2]
                        final_answer += '\n{0}在{1}的网点分布如下:'.format(bank, area)
                    final_answer += '\n{0},具体地址:{1}'.format(i[0], i[1])

        elif question_type == 'area_subbank':
            bank = answers[0]['n.名称']
            areas = [i['m.区域'] for i in answers]
            area = set(areas)
            subbank = [i['m.名称'] for i in answers]
            final_answer += '{0}在{1}的支行有:'.format(bank, area)
            list_zip = list(zip(subbank, areas))
            for a in set(areas):
                for i in list_zip:
                    if i[1] != a:
                        continue
                    if i[1] != area:
                        area = i[1]
                        final_answer += '\n{0}在{1}的支行有:'.format(bank, area)
                    final_answer += '\n{0}'.format(i[0])

        elif question_type == 'attribution_infos':
            attribution = answers[0]['m.名称']
            diff = answers[0]['m.类别差异']
            cate = answers[0]['m.类别']
            subject = [i['n.名词'] for i in answers]
            final_answer = '{0}:\n(1){0}含有的类别有:{1}。\n(2)类别差异为:{2}。'.format(
                attribution, '、'.join(list(set(subject))), cate, diff)

        elif question_type == 'product_number':
            bank = answers[0]['m.名称']
            print(answers)
            state = [i['n.产品状态'] for i in answers]
            count = [i['count(n)'] for i in answers]
            final_answer += '{0}:'.format(bank)
            list_zip = list(zip(state, count))
            count_all = 0
            for i in list_zip:
                count_all += i[1]
                final_answer += '\n{0}产品有{1}个,'.format(i[0], i[1])
            final_answer += '\n所有产品总数为{0}个。'.format(count_all)

        elif question_type == 'product_category_number':
            bank = answers[0]['m.名称']
            cate = answers[0]['p.名词']
            print(answers)
            state = [i['n.产品状态'] for i in answers]
            count = [i['count(n)'] for i in answers]
            final_answer += '{0}{1}产品情况:'.format(bank, cate)
            list_zip = list(zip(state, count))
            count_all = 0
            for i in list_zip:
                count_all += i[1]
                final_answer += '\n{0}产品有{1}个,'.format(i[0], i[1])
            final_answer += '\n所有产品总数为{0}个。'.format(count_all)

        elif question_type == 'subbank_number':
            print(answers)
            for i in answers:
                bank = i['n.名称']
                area = i['m.区域']
                count = i['count(r)']
                count_all = 0
                if i == answers[0]:
                    count_all += count
                    final_answer += '{0}\n在{1}共有{2}个网点,\n'.format(
                        bank, area, count)
                elif i == answers[-1]:
                    count_all += count
                    final_answer += '在{0}共有{1}个网点,\n'.format(area, count)
                else:
                    count_all += count
                    final_answer += '在{0}共有{1}个网点,\n'.format(area, count)
            final_answer += '{0}在所查询地区的网点总共有{1}个。'.format(bank, count_all)

        elif question_type == 'product_area':
            name = answers[0]['m.产品名称']
            area = [i['n.名称'] for i in answers]
            final_answer = '{}的销售区域为{}。'.format(name,
                                                '、'.join(list(set(area))))

        elif question_type == 'product_attribution':
            print(answers)
            desc = [list(i.values())[-1] for i in answers]
            desc1 = ['' if x == None else x for x in desc]
            final_answer = ';'.join(list(set(desc1))[:self.num_limit])

        elif question_type == 'product_user':
            print(answers)
            name = answers[0]['m.产品名称']
            user = [i['n.适用人群'] for i in answers if i['n.适用人群'] != None]
            print(user)
            final_answer = '{0}的适用人群为:\n{1}'.format(name,
                                                    '\n'.join(list(set(user))))

        elif question_type == 'investment_category':
            name = [i['m.名词'] for i in answers]
            desc = [i['n.名词'] for i in answers]
            list_zip = list(zip(name, desc))
            for i in list_zip:
                if i != list_zip[-1]:
                    final_answer += '{0}所属类型为{1}\n'.format(i[0], i[1])
                else:
                    final_answer += '{0}所属类型为{1}\n'.format(i[0], i[1])

        elif question_type == 'if_investment_category':
            name = answers[0]['m.名词']
            desc = answers[0]['n.名词']
            if desc:
                final_answer = '是的!'
            else:
                final_answer = '不对哦!'  # 功能还需改进返回准确答案(下方,未# test)
                try:
                    ansname = answers[1]['m.名词']
                    ansdesc = answers[1]['n.名词']
                    final_answer += '{0}所属类型为{1}。'.format(ansname, ansdesc)
                except:
                    pass

        elif question_type == 'institution_category':
            print(answers)
            name = answers[0]['m.名称']
            desc = answers[0]['n.名词']
            final_answer = '{}所属机构类别为{}。'.format(name, desc)

        elif question_type == 'institution_bank':
            name = answers[0]['m.名称']
            desc = answers[0]['n.名称']
            final_answer = '{}所属总行为{}。'.format(name, desc)

        elif question_type == 'if_category':
            name = answers[0]['m.产品名称']
            desc = answers[0]['p.名词']
            if desc:
                final_answer = '是的!'
            else:
                final_answer = '不对哦!'  # 功能还需改进返回准确答案
                try:
                    ansname = answers[1]['m.产品名称']
                    anstype = answers[1]['p.名词']
                    ansattr = answers[1]['n.名称']
                    final_answer += '{0}的{1}属性的类别为{2}。'.format(
                        ansname, ansattr, anstype)
                except:
                    pass

        elif question_type == 'bank_time':
            bank = answers[0]['m.名称']
            time = answers[0]['m.营业时间']
            final_answer = '{}的营业时间为{}'.format(bank, time)

        elif question_type == 'production_time':
            #name = answers[0]['m.产品名称']
            time = list(answers[0].values())[-1]
            final_answer = '{}'.format(time)

        elif question_type == 'attribution_different':
            name = answers[0]['m.名称']
            desc = answers[0]['m.类别差异']
            final_answer = '{}不同类别之间的差异如下:\n{}'.format(name, desc)

        elif question_type == 'category_different':
            name = answers[0]['m.名词']
            for i in answers:
                name1 = i['m.名词']
                rela = i['r.different']
                name2 = i['n.名词']
                if i != answers[-1]:
                    final_answer += '{}与{}之间的差异为{}。\n'.format(
                        name1, rela, name2)
                else:
                    final_answer += '{}与{}之间的差异为{}。'.format(name1, rela, name2)

        elif question_type == 'other_category':
            name = answers[0]['m.名词']
            desc = [i['n.名词'] for i in answers]
            final_answer = '除了{},还有:{}'.format(name, '、'.join(list(set(desc))))

        elif question_type == 'category_nature':
            name = answers[0]['m.名词']
            desc = answers[0]['m.特性']
            final_answer = '{}产品的特性为{}'.format(name, desc)

        elif question_type == 'recommend_category':
            final_answer = '为你推荐的产品有:'
            for i in answers:
                desc = i['m.名词']
                if i != answers[-1]:
                    final_answer = + '{1}产品、'.format(desc)
                else:
                    final_answer = + '{1}产品。'.format(desc)

        elif question_type == 'if_recommend_category':
            name = answers[0]['m.名词']
            desc = answers[0]['n.名词']
            if desc:
                final_answer = '是的!'
            else:
                final_answer = '不对哦!'  # 功能还需改进返回准确答案(下方,未# test)
                final_answer += '为你推荐的产品有:'
                try:
                    for i in answers:
                        desc = i['m.名词']
                        if i == answers[0]:
                            pass
                        else:
                            if i != answers[-1]:
                                final_answer = + '{1}产品、'.format(desc)
                            else:
                                final_answer = + '{1}产品。'.format(desc)
                except:
                    pass

        elif question_type == 'if_buy':
            name = answers[0]['m.产品名称']
            state = answers[0]['m.产品状态']
            final_answer = '{}目前状态为{},'.format(name, state)
            if state == '在售':
                final_answer += '现在可以购买哦!'
            else:
                final_answer += '现在不可以购买哦!'

        return final_answer
# #========================================== Get files ==========================================#

if __name__ == "__main__":
    pw = os.environ.get('NEO4J_PASS')
    g = Graph("http://localhost:7474/", password=pw)  ## readme need to document setting environment variable in pycharm
    # g.delete_all()
    tx = g.begin()

    index1 = '''
    CREATE INDEX ON: Contribution(type)
    '''
    index2 = '''
    CREATE INDEX ON: Committee(name)
    '''
    g.run(index1)
    g.run(index2)

    # root =  os.getcwd()
    # path = os.path.join(root, "data")
    # disclosure_1st_path = os.path.join(path, "2013_MidYear_XML")
    # files = [f for f in os.listdir(disclosure_1st_path) if f.endswith('.xml')]
    # files = ['file:///Users/yaqi/Documents/health-graph/data/2013_MidYear_XML/700669542.xml']  # Return xml files

    def get_file_path(kind):
        root_dir = '/Users/yaqi/Documents/data/' + kind
        filenames = [f for f in os.listdir(root_dir) if f.endswith('.xml')]
        filepath = []
        for file in filenames:
            path = 'file://' + os.path.join(root_dir, file)
            filepath.append(path)
def UserStory2(Neo4JPwd, Dbname):
    graph = Graph(password=Neo4JPwd)

    # initializations and Validations
    input1_PremiumRange = str(
        input(
            "Please enter the premium range you want from following options:\n"
            "1000-50000 \n"
            "50000-100000 \n"
            "100000-150000 \n"
            "150000-200000 \n"))

    if input1_PremiumRange != "1000-50000" and input1_PremiumRange != "50000-100000" and input1_PremiumRange != "100000-150000" and input1_PremiumRange != "150000-200000":
        print(
            "Entered Premium Range is not from mentioned list.Please try again by entering the range from options given"
        )
        return
    if input1_PremiumRange == []:
        print(
            "Entered Premium Range is not valid.Please try again by entering the range from options given"
        )
        return

    input3_CoverFor = str(
        input("Please enter the disease you require in coverage:")).lower()

    if input3_CoverFor == []:
        print("Entered Covered disease is not valid.Please try again")
        return

    input2_CompanyName = str(
        input("Please enter the company name of  the insurance:")).lower()

    if input2_CompanyName == []:
        print("Entered Company Name is not valid.Please try again")
        return

    # NEO4J Query for retreiving Policy Names based on above matching criteria

    results = graph.run("""MATCH (a:CompanyName)-[r:Has_Policy]->(b:PolicyName)
    WHERE toLower(a.CompanyName) = $CName
    WITH {PolicyName:b.PolicyName} AS Policies
    MATCH (t:PolicyName)- [r:PremiumRange_Of]->(u:PremiumRanges)
    WITH {PolicyName:t.PolicyName} AS PoliciesRange
    WHERE Policies.PolicyName = t.PolicyName AND u.PremiumRange = \"""" + \
                        input1_PremiumRange + """\"
    MATCH (a:PolicyName)-[r:Diseases_Covered]->(b:Disease)
    WHERE PoliciesRange.PolicyName = a.PolicyName AND toLower(b.name) = \"""" + \
                        input3_CoverFor + """\"
    return PoliciesRange.PolicyName AS PolicyNames""", parameters={'CName': input2_CompanyName}).data()

    if results != []:
        Policy_Name = [record['PolicyNames'] for record in results]
        # Mongodb Query to retrieve Min and Max Policies
        client = MongoClient()
        db = client[Dbname]
        collection = db.Insurance

        Pol_PremiumDetails = collection.aggregate([{
            "$match": {
                "CompanyName": {
                    "$ne": None
                }
            }
        }, {
            "$unwind": "$Policies"
        }, {
            "$match": {
                "Policies.PolicyName": {
                    "$in": Policy_Name
                }
            }
        }, {
            "$project": {
                "_id": 0,
                "CompanyName": {
                    "$toLower": "$CompanyName"
                },
                "PolicyName": "$Policies.PolicyName",
                "Premium": "$Policies.Premium"
            }
        }, {
            "$match": {
                "CompanyName": input2_CompanyName
            }
        }, {
            "$out": "Policy_Premium"
        }])

        query = db.Policy_Premium.aggregate([{
            "$group": {
                "_id": "$CompanyName",
                "minPremium": {
                    "$min": "$Premium"
                },
                "maxPremium": {
                    "$max": "$Premium"
                }
            }
        }])

        df = pd.DataFrame(query)
        for index, row in df.iterrows():
            minPremium = row['minPremium']
            maxPremium = row['maxPremium']

        # print(df)
        Minimum_Policy = db.Policy_Premium.find({"Premium": minPremium}, {
            "PolicyName": 1,
            "_id": 0
        })
        Maximum_Policy = db.Policy_Premium.find({"Premium": maxPremium}, {
            "PolicyName": 1,
            "_id": 0
        })

        Minimum_Policy1 = [record['PolicyName'] for record in Minimum_Policy]
        Maximum_Policy1 = [record['PolicyName'] for record in Maximum_Policy]
        Minimum_Premium_PolicyName = []
        for x in Minimum_Policy1:
            Minimum_Premium_PolicyName.append(x)
        Maximum_Premium_PolicyName = []
        for x in Maximum_Policy1:
            Maximum_Premium_PolicyName.append(x)
        df['Minimum_PolicyName'] = Minimum_Premium_PolicyName
        df['Maximum_PolicyName'] = Maximum_Premium_PolicyName

        # Final Output
        print(tabulate(df, headers='keys', tablefmt='psql'))
    else:
        print("There is no Policy for this combination in our database.")
Exemple #37
0
from py2neo import Node, Relationship, Graph
from random import randrange

g = Graph()

g.run("""MATCH (n)
OPTIONAL MATCH (n)-[r]-()
DELETE n,r""")

people = ["Jake", "Emily", "Alex"]
people_two = ["Emily", "Alex", "Jake"]
pairs = zip(people, people_two)

for pair in pairs:
    tx = g.begin()
    a = Node("Person", name=pair[0])
    b = Node("Person", name=pair[1])
    ab = Relationship(a, "KNOWS", b, weight=randrange(0, 1))
    tx.create(a)
    tx.create(b)
    tx.create(ab)
    tx.commit()

print(g.run("""
    MATCH (a:Person {name :'Jake'})-[relab:KNOWS]->(b:Person), -[]->()
    RETURN a.name, b.name
""").data())
Exemple #38
0
# 方式1:
g = Graph(host="localhost", password='******',bolt=True, bolt_port=7689)
print g.data('match (n) return count(*)')
sys.exit(1)
"""

# 方式2:  *****访问被代理或docker 容器中的 neo4j server的话,只能用这种方式 *********
# set up authentication parameters
http_port = "7476"
authenticate("localhost:"+http_port, "username", "password")
# connect to authenticated graph database
g = Graph("http://localhost:"+http_port+"/db/data/", bolt_port=7689)


g.data('match (n) return count(*)')
g.run('match (n) return count(*)').dump()


# import data in one transaction
tx = g.begin()
a = Node("Person", name="Alice")
b = Node("Person", name="Bob")
tx.create(a)
ab = Relationship(a, "KNOWS", b)
tx.create(ab)
#tx.commit()
print g.exists(ab)

# get nodes in one autocommit transaction
g.run("MATCH (a:Person) RETURN a.name, a.born LIMIT 4").data()
Exemple #39
0
import os
import sys
import time
import requests
from py2neo import Graph, Node, Relationship

graph = Graph()

graph.run("CREATE CONSTRAINT ON (u:User) ASSERT u.username IS UNIQUE")
graph.run("CREATE CONSTRAINT ON (t:Tweet) ASSERT t.id IS UNIQUE")
graph.run("CREATE CONSTRAINT ON (h:Hashtag) ASSERT h.name IS UNIQUE")

TWITTER_BEARER = os.environ["TWITTER_BEARER"]

headers = dict(accept="application/json", Authorization="Bearer " + TWITTER_BEARER)

payload = dict(
    count=100,
    result_type="recent",
    lang="en",
    q=sys.argv[1]
)

base_url = "https://api.twitter.com/1.1/search/tweets.json?"


def find_tweets(since_id):
    payload["since_id"] = since_id
    url = base_url + "q={q}&count={count}&result_type={result_type}&lang={lang}&since_id={since_id}".format(**payload)

    r = requests.get(url, headers=headers)
server = conf["protocol"]+"://"+conf["host"]+":"+str( conf["port"] )

logging.basicConfig(level=logging.ERROR)

numiter = 10000

graph = Graph(server)

label = "TAXID"

# Hashes for storing stuff
parentid={}
scientific_list={}
names_list={}

idxout = graph.run("CREATE CONSTRAINT ON (n:"+label+") ASSERT n.id IS UNIQUE")

def process_relationship( statements, graph ):
	
	tx = graph.begin()
	
	#print statements
	logging.info('proc sent')
	
	for statement in statements:
		#print statement
		start = graph.nodes.match(statement[0], id=int( statement[1] )).first()
		end = graph.nodes.match(statement[0], id=int( statement[2] )).first()
		rel = Relationship( start, statement[3], end )
		
		tx.create( rel )
import traceback
import psycopg2 
import psycopg2.extras
import sys
from py2neo import Graph, Node, Relationship, watch, authenticate


http_port = 7474
bolt_port = 7687
host = "x.com.cn"

authenticate("%s:%s" % (host, http_port), "name", "password")
# connect to authenticated graph database
g = Graph("http://%s:%s/db/data/" % (host, http_port), bolt_port=bolt_port)
g.data('match (n) return count(*)')
g.run('match (n) return count(*)').dump()
now_datetime = datetime.datetime.now()

def getDBConnection():
    conn = psycopg2.connect(
        host="x.com.cn",
        port=432,
        user="******",
        password="******",
        dbname="xx",
        connect_timeout=10)
    return conn

watch('httpstream')

conn = getDBConnection()
from py2neo import Graph, Node
import os

def create_DrugFirm_node(file, g):
    query = '''
    USING PERIODIC COMMIT 500
    LOAD CSV WITH HEADERS FROM {file}
    AS line
    FIELDTERMINATOR '	'
    CREATE(df:DrugFirm {dunsNumber: line.DUNS_NUMBER, firmName: line.FIRM_NAME,
    address: line.ADDRESS, operations: line.OPERATIONS})
    RETURN id(df), df.firmName
    '''

    return g.run(query,file = file)


if __name__ == "__main__":
    pw = os.environ.get('NEO4J_PASS')
    g = Graph("http://localhost:7474/", password=pw)  ## readme need to document setting environment variable in pycharm
    tx = g.begin()

    index = '''
    CREATE INDEX ON: DrugFirm(firmName)'''
    g.run(index)
    print("Create index on DrugFirm(firmName)")


    file = 'file:///drls_reg.txt'
    df_node = create_DrugFirm_node(file, g)
    print("Finish loading DrugFirm")
def UserStory4(Neo4JPwd, Dbname, redisdb):
    ## Intializations ###
    result = []
    result2 = []
    disease = []
    disease_final = []
    disease_drug = []
    disease_drug2 = []
    disease_drug3 = []
    disease_drug4 = []
    temp_se = {}
    result40 = []

    graph = Graph(password=Neo4JPwd)
    r = redis.Redis(db=redisdb)
    pipeline = r.pipeline()

    ### Redis Search - for all seacrhed topics####
    val = input("Please enter Month(in 2 digit format) of your choice: ")

    if len(val) != 2:
        print(
            "You didn't enter a 2 digit month code hence terminating program")
        return

    if int(val) <= 0 or int(val) > 12:
        print("You didnt enter a valid month digit hence terminating program")
        return

    print("Please wait for few seconds...")
    pattern = "*-" + val + "-*SEARCH"
    x = r.keys(pattern=pattern)
    for y in x:
        pipeline.lrange(y, 0, 100)

    result = pipeline.execute()

    for y in result:
        result2.append(y[0].decode('utf-8'))

    result2 = list(
        chain.from_iterable(
            repeat(i, c) for i, c in Counter(result2).most_common()))
    result3 = list(dict.fromkeys(result2))

    ### Mongo Search - for filter out diseases from search topics####
    client = MongoClient()
    db = client[Dbname]
    for temp in result3:
        x = db.Disease.find({"Disease": temp}, {"Disease": 1, "_id": 0})
        for y in x:
            disease.append(y['Disease'])

    ### Neo Search - to find drug of the diseases####
    for temp2 in disease:
        query = """match(n:Disease)<-[:Medicine_For]-(b) where n.name=\"""" + \
                temp2 + """\" return n.name as Disease,b.Drug_Name as Drug"""
        neo_result = graph.run(query).data()
        disease_drug.append(neo_result)

    disease_drug2 = list2 = [x for x in disease_drug if x != []]

    ### Mongo Search for side effects of the drugs ###
    for irr1 in disease_drug2:
        for irr2 in irr1:
            drug = irr2.get('Drug')
            disease5 = irr2.get('Disease')
            pipeline = [{
                "$match": {
                    "Drug_Name": drug
                }
            }, {
                "$project": {
                    "Drug_Name": 1,
                    "_id": 0,
                    "Side_Effects": {
                        "$switch": {
                            "branches": [{
                                "case": {
                                    "$eq": ["$Side_Effects", None]
                                },
                                "then": "Blank"
                            }],
                            "default":
                            "$Side_Effects"
                        }
                    }
                }
            }]
            result4 = db.Drugs.aggregate(pipeline)
            for irr3 in result4:
                irr3['Disease'] = disease5
                disease_drug3.append(irr3)

    ### Grouping and filtering for final result ###
    for curr in disease_drug3:
        temp_se = {}
        side_effects = curr.get('Side_Effects')
        if side_effects == 'Blank':
            count_se = 0
        else:

            if "following:" in side_effects:
                side_effects2 = side_effects.split("following:")
                count_se = side_effects2[1].count('\n') - 1
            else:
                count_se = 1

        temp_se['Disease'] = curr.get('Disease')
        temp_se['Drug'] = curr.get('Drug_Name')
        temp_se['No_Of_SE'] = count_se
        disease_drug4.append(temp_se)

    df_se = pd.DataFrame(disease_drug4)

    df_se_2 = df_se.groupby(['Disease']).agg({
        'No_Of_SE': ['min']
    }).reset_index()
    df_se_2.columns = df_se_2.columns.droplevel(1)
    df_se_3 = []
    df_se_4 = pd.DataFrame([], columns=list(['Disease', 'Drug', 'No_Of_SE']))

    for index, row in df_se_2.iterrows():
        filter_1 = df_se["Disease"] == row['Disease']
        filter_2 = df_se["No_Of_SE"] == row['No_Of_SE']
        df_se_4 = df_se[filter_1 & filter_2]
        for index, row in df_se_4.iterrows():
            temp_se_2 = {}
            temp_se_2['Disease'] = row['Disease']
            temp_se_2['Drug'] = row['Drug']
            temp_se_2['No_Of_SE'] = row['No_Of_SE']
            result40.append(temp_se_2)

    result_df = pd.DataFrame(result40)
    pd.set_option('display.max_colwidth', -1)
    result_df = result_df.drop_duplicates()
    final = result_df.groupby('Disease').agg({'Drug': ['min']}).reset_index()
    final.columns = final.columns.droplevel(1)

    for kor in disease_drug3:
        disease_final.append(kor['Disease'])

    seen = set()
    seen_add = seen.add
    gog = [x for x in disease_final if not (x in seen or seen_add(x))]
    gog2 = gog[:10]

    final2 = final[final['Disease'].isin(gog2)]

    print(final2.to_string(index=False, justify='left'))
Exemple #44
0
class CivilAviationKnowledgeGraph:

    def __init__(self):
        self.data_path = "./data/data.json"
        self.export_dir = "./data/dicts/"

        self.graph = Graph("http://localhost:7474", auth=("neo4j", "shawn"))
        self.entities = {}  # 收集实体
        self.attrs = {}  # 实体属性
        self.rels_structures = set()  # 实体结构关系
        self.rels_values = []  # 实体值关系
        self.cur_value_rel_src = None  # 记录值关系的源节点
        self.cur_index_name = None  # 记录当前指标名称

    def collect(self):
        print("开始收集数据...")
        with open(self.data_path, 'r', encoding='gbk') as fp:
            data = json.load(fp)
            # 开始以递归方式收集数据
            self._travel(data, first_time=True)
        print("数据收集完毕!")

    def _travel(self, objs: dict, parent: tuple = None, first_time=False):
        for fields, entities in objs.items():
            # 分离前缀和实体名
            prefix, name = fields.split('-')
            if first_time:
                self.cur_value_rel_src = (prefix, name)
            if prefix == 'I' and entities.get("next"):
                self.cur_index_name = name
            # 获取实体属性
            attrs = entities.get("attrs")
            if attrs:
                self.collect_attrs(name, attrs)
            # 获取下一层实体
            next_ = entities.get("next")
            # 获取关系
            rels = entities.get("rels")

            self.collect_entity(prefix, name)
            if parent:
                self.collect_structure_rel(parent, (prefix, name))
            if rels:
                self.collect_value_rel((prefix, name), rels)
            if next_:
                self._travel(next_, (prefix, name))

    def collect_entity(self, key: str, name: str):
        """ 收集实体 """
        self.entities.setdefault(key, set()).add(name)

    def collect_attrs(self, name: str, attrs: dict):
        """ 收集实体的属性 """
        self.attrs[name] = attrs

    def collect_structure_rel(self, src: tuple, dst: tuple):
        """ 收集实体结构关系 """
        self.rels_structures.add((src[0] + '-' + dst[0], src[1], dst[1]))

    def collect_value_rel(self, dst: tuple, attrs: dict):
        """ 收集实体值关系 """
        if dst[0] == 'A':
            # Year-Area($Index$)
            attrs['name'] = self.cur_index_name
        self.rels_values.append((self.cur_value_rel_src[0] + '-' + dst[0],
                                 self.cur_value_rel_src[1], dst[1], attrs))

    def build(self):
        """ 从收集的数据中构建知识图谱 """
        print("开始构建实体...")
        self.build_nodes()
        print("实体构建完毕!")

        print("开始构建关系...")
        self.build_relationships()
        print("关系构建完毕!")

    def build_nodes(self):
        """ 构建实体结点 """
        for prefix, nodes in self.entities.items():
            label = PREFIX_LABEL_MAP[prefix]
            for name in nodes:
                self.create_node(label, name, self.attrs.get(name))

    def build_relationships(self):
        """ 构建实体关系 """
        for (prefix, src, dst) in self.rels_structures:
            a, b = prefix.split('-')
            la = PREFIX_LABEL_MAP[a]
            lb = PREFIX_LABEL_MAP[b]
            rel = PREFIX_S_REL_MAP[prefix]
            self.create_relationship(la, lb, src, dst, rel)

        for (prefix, src, dst, attrs) in self.rels_values:
            a, b = prefix.split('-')
            la = PREFIX_LABEL_MAP[a]
            lb = PREFIX_LABEL_MAP[b]
            rel = PREFIX_V_REL_MAP[prefix]
            self.create_relationship(la, lb, src, dst,
                                     rel if rel else attrs['name'], attrs)

    def create_node(self, label: str, name: str, attrs=None):
        """ 创建结点 """
        if attrs is None:
            attrs = {}
        node = Node(label, name=name, **attrs)
        self.graph.create(node)

    def create_relationship(self, src_label: str, dst_label: str, src: str, dst: str, rel: str, attrs=None):
        """ 创建关系 """
        if attrs:
            rel_attrs = ", ".join([f"{k}: '{v}'" for k, v in attrs.items()])
        else:
            rel_attrs = "name:'%s'" % rel
        query = f"match(s:{src_label}),(d:{dst_label}) where s.name='{src}' and d.name='{dst}' " \
                f"create (s)-[rel:{rel} {{{rel_attrs}}}]->(d)"
        try:
            self.graph.run(query)
        except Exception as err:
            print(err)

    def export_collections(self):
        """ 导出收集的实体 """
        if not os.path.exists(self.export_dir):
            os.mkdir(self.export_dir)
        for key, values in self.entities.items():
            write_to_file(f"./data/dicts/{PREFIX_LABEL_MAP[key]}.txt", values)
        print("导出实体数据完毕.")
Exemple #45
0
import json
from py2neo import Graph

db = Graph('http://45.55.182.50:7474/')

db.run("CREATE INDEX ON :Business(id)")
db.run("CREATE INDEX ON :Category(name)")


create_business_query = '''
// MERGE ON categories
MERGE (b:Business {id: {business_id}})
ON CREATE SET b.name = {name}, 
	b.latitude = {latitude}, 
	b.longitude = {longitude},
	b.stars = {stars}, 
	b.review_count = {review_count}
WITH b
UNWIND {categories} AS category
MERGE (c:Category {name: category})
MERGE (b)-[:IS_IN]->(c)
'''

merge_category_query = '''
MATCH (b:Business {id: {business_id}})
MERGE (c:Category {name: {category}})
CREATE UNIQUE (c)<-[:IS_IN]-(b)
'''


def UserStory1(Neo4JPwd, Dbname, redisdb):
    graph = Graph(password=Neo4JPwd)
    print("Enter the symptoms with space")
    inp = [str(i).lower() for i in input().split()]
    if inp == []:
        print(
            "Entered input is not valid.Please try again by entering symptoms with space"
        )
        return

    results = graph.run('''with $in as symptoms
    match (s:Symptom)
    Where toLower(s.name) in symptoms
    with collect(s) as symptoms
    match (d:Disease)
    where all(s in symptoms Where (s)-[:CAUSES]->(d))
    return DISTINCT d.name as Disease, d.id as Disease_id''',
                        parameters={
                            'in': inp
                        }).data()

    now = datetime.now()
    current_time = now.strftime("%d-%m-%Y:%H:%M:%S")
    username = ''.join(map(str, generate_username(1)))

    r = redis.Redis(db=redisdb)

    client = MongoClient()
    db = client[Dbname]
    collection = db.Disease
    if results == []:
        print(
            f"Tring.. Tring..... Tring............  database does not have any disease associated with mentioned symptoms {inp}"
        )
        print(
            "--------------------------------------------------------------------------------"
        )
        for i in range(len(inp)):
            key = 'NODISEASE' + ':' + current_time + ':' + username.upper()
            r.lpush(key, inp[i].upper())
            print(
                f"Search data {inp[i].upper()} inserted into Redis with key {key} for further validation"
            )
        print(
            "--------------------------------------------------------------------------------"
        )
        return
    record_ids = [record['Disease'].lower() for record in results]
    record_ids2 = [ObjectId(record['Disease_id']) for record in results]
    print(f"{inp} symptoms causes disease(s) {record_ids}")
    print(
        "--------------------------------------------------------------------------------"
    )
    for i in range(len(inp)):
        key = 'SYMPTOMS' + ':' + current_time + ':' + username.upper()
        r.lpush(key, inp[i].upper())
        print(
            f"Search data {inp[i].upper()} inserted into Redis with key {key}")
    print(
        "--------------------------------------------------------------------------------"
    )
    inp1 = input(
        "You want to know more about Diseases and their treatments (y/n):"
    ).lower()
    if inp1 != 'y':
        return
    print(
        "Please enter Disease you want to know from above list, enter all if you want to know about all Diseases:"
    )
    disease = input().lower()
    print(disease)
    if disease != 'all' and disease in record_ids:
        query = collection.find(
            {'Disease': re.compile(disease, re.IGNORECASE)})
        df = pd.DataFrame(query)
        for index, row in df.iterrows():
            print(
                "--------------------------------------------------------------------------------"
            )
            print(f"{inp} causes '{row['Disease']}'")
            print(f"please find some treatements for '{row['Disease']}''")
            print(row['Diagnosis_treatment'])
            if 'Specialization' in row:
                print(
                    f"This '{row['Disease']}' can be treated by doctors having specialization of {row['Specialization']['Name']}"
                )
            key = 'DISEASE' + ':' + current_time + ':' + username.upper()
            r.lpush(key, row['Disease'].upper())
            print(
                "--------------------------------------------------------------------------------"
            )
            print(
                f"Search data {row['Disease'].upper()} inserted into Redis with key {key}"
            )
    else:
        if disease == 'all':
            print(record_ids2)
            query = collection.find({'_id': {'$in': record_ids2}})
            df = pd.DataFrame(query)
            print(df)
            for index, row in df.iterrows():
                print(
                    "---------------------------------------------------------------------------------------------"
                )
                print(f"{inp} causes '{row['Disease']}'")
                print(f"please find some treatements for '{row['Disease']}''")
                print(row['Diagnosis_treatment'])
                if 'Specialization' in row:
                    if not pd.isna(row['Specialization']):
                        print(
                            f"This '{row['Disease']}' can be treated by doctors with specialization of {row['Specialization']['Name']}"
                        )
                key = 'DISEASE' + ':' + current_time + ':' + username.upper()
                r.lpush(key, row['Disease'].upper())
                print(
                    "--------------------------------------------------------------------------------"
                )
                print(
                    f"Search data {row['Disease'].upper()} inserted into Redis with key {key}"
                )
                print(
                    "--------------------------------------------------------------------------------"
                )
        else:
            print(
                f"Entered {disease} does not exist in our database. Thank you for choosing healthcare engine"
            )
Exemple #47
0
class BaseContext(object):
    """
    Base CorpusContext class.  Inherit from this and extend to create
    more functionality.

    Parameters
    ----------
    args : arguments or :class:`~polyglotdb.config.CorpusConfig`
        If the first argument is not a CorpusConfig object, it is
        the name of the corpus
    kwargs : keyword arguments
        If a :class:`~polyglotdb.config.CorpusConfig` object is not specified, all arguments and
        keyword arguments are passed to a CorpusConfig object
    """
    def __init__(self, *args, **kwargs):
        if len(args) == 0:
            raise(CorpusConfigError('Need to specify a corpus name or CorpusConfig.'))
        if isinstance(args[0], CorpusConfig):
            self.config = args[0]
        else:
            self.config = CorpusConfig(*args, **kwargs)
        self.config.init()
        self.graph = Graph(**self.config.graph_connection_kwargs)
        self.corpus_name = self.config.corpus_name
        if self.corpus_name:
            self.init_sql()

        self.hierarchy = Hierarchy({})

        self.lexicon = Lexicon(self)
        self.census = Census(self)

        self._has_sound_files = None
        self._has_all_sound_files = None
        if getattr(sys, 'frozen', False):
            self.config.reaper_path = os.path.join(sys.path[-1], 'reaper')
        else:
            self.config.reaper_path = shutil.which('reaper')

        if sys.platform == 'win32':
            praat_exe = 'praatcon.exe'
        else:
            praat_exe = 'praat'

        if getattr(sys, 'frozen', False):
            self.config.praat_path = os.path.join(sys.path[-1], praat_exe)
        else:
            self.config.praat_path = shutil.which(praat_exe)

        self.config.query_behavior = 'speaker'

    def load_variables(self):
        """
        Loads variables into Hierarchy
        """
        try:
            with open(os.path.join(self.config.data_dir, 'variables'), 'rb') as f:
                var = pickle.load(f)
            self.hierarchy = var['hierarchy']
        except FileNotFoundError:
            if self.corpus_name:
                self.hierarchy = self.generate_hierarchy()
                self.save_variables()

    def save_variables(self):
        """ saves variables to hierarchy"""
        with open(os.path.join(self.config.data_dir, 'variables'), 'wb') as f:
            pickle.dump({'hierarchy': self.hierarchy}, f)

    def init_sql(self):
        """
        initializes sql connection
        """
        self.engine = create_engine(self.config.sql_connection_string)
        Session.configure(bind=self.engine)
        if not os.path.exists(self.config.db_path):
            Base.metadata.create_all(self.engine)

    def execute_cypher(self, statement, **parameters):
        """
        Executes a cypher query
        
        Parameters
        ----------
        statement : str
            the cypher statement
        parameters : dict
            keyword arguments to execute a cypher statement

        Returns
        -------
        query result : 
        or
        raises error

        """
        try:
            return self.graph.run(statement, **parameters)
        except py2neo.packages.httpstream.http.SocketError:
            raise(ConnectionError('PolyglotDB could not connect to the server specified.'))
        except ClientError:
            raise
        except (Unauthorized):
            raise(AuthorizationError('The specified user and password were not authorized by the server.'))
        except Forbidden:
            raise(NetworkAddressError('The server specified could not be found.  Please double check the server address for typos or check your internet connection.'))
        except (TransientError):
            raise(TemporaryConnectionError('The server is (likely) temporarily unavailable.'))
        except ConstraintError:
            pass
        except Exception:
            raise

    @property
    def discourses(self):
        '''
        Return a list of all discourses in the corpus.
        '''
        q = self.sql_session.query(Discourse).all()
        if not len(q):
            res = self.execute_cypher('''MATCH (d:Discourse:{corpus_name}) RETURN d.name as discourse'''.format(corpus_name = self.corpus_name))
            discourses = []
            for d in res:
                instance = Discourse(name = d.discourse)
                self.sql_session.add(instance)
                discourses.append(d.discourse)
            self.sql_session.flush()
            return discourses
        return [x.name for x in q]

    @property
    def speakers(self):
        """
        Gets a list of speakers in the corpus

        Returns
        -------
        names : list
            all the speaker names
        """
        q = self.sql_session.query(Speaker).all()
        if not len(q):
            res = self.execute_cypher('''MATCH (s:Speaker:{corpus_name}) RETURN s.name as speaker'''.format(corpus_name = self.corpus_name))

            speakers = []
            for s in res:
                print(s)
                instance = Speaker(name = s['speaker'])
                self.sql_session.add(instance)
                speakers.append(s['speaker'])
            self.sql_session.flush()
            return speakers
        return [x.name for x in q]

    def __enter__(self):
        self.sql_session = Session()
        self.load_variables()
        #if self.corpus_name:
        #    self.hierarchy = self.generate_hierarchy()
        return self

    def __exit__(self, exc_type, exc, exc_tb):
        if exc_type is None:
            #try:
            #    shutil.rmtree(self.config.temp_dir)
            #except:
            #    pass
            self.sql_session.commit()
            return True
        else:
            self.sql_session.rollback()
        self.sql_session.expunge_all()
        self.sql_session.close()

    def __getattr__(self, key):
        if key == 'pause':
            return PauseAnnotation(corpus = self.corpus_name)
        if key + 's' in self.hierarchy.annotation_types:
            key += 's' # FIXME
        if key in self.hierarchy.annotation_types:
            return AnnotationAttribute(key, corpus = self.corpus_name, hierarchy = self.hierarchy)
        raise(GraphQueryError('The graph does not have any annotations of type \'{}\'.  Possible types are: {}'.format(key, ', '.join(sorted(self.hierarchy.annotation_types)))))

    @property
    def word_name(self):
        """
        Gets the word label

        Returns
        -------
        word : str
            word name
        """
        for at in self.hierarchy.annotation_types:
            if at.startswith('word'): #FIXME need a better way for storing word name
                return at
        return 'word'

    @property
    def phone_name(self):
        """
        Gets the phone label

        Returns
        -------
        phone : str
            phone name
        """
        name = self.hierarchy.lowest
        if name is None:
            name = 'phone'
        return name

    def reset_graph(self, call_back = None, stop_check = None):
        '''
        Remove all nodes and relationships in the graph that are apart
        of this corpus.
        '''
        if call_back is not None:
            call_back('Resetting database...')
            number = self.execute_cypher('''MATCH (n:%s)-[r]-() return count(*) as number ''' % (self.corpus_name)).evaluate()
            call_back(0, number * 2)
        num_deleted = 0
        deleted = 1000
        while deleted > 0:
            if stop_check is not None and stop_check():
                break
            deleted = self.execute_cypher('''MATCH (n:%s)-[r]-() with r LIMIT 50000 DELETE r return count(r) as deleted_count ''' % (self.corpus_name)).evaluate()
            num_deleted += deleted
            if call_back is not None:
                call_back(num_deleted)
        deleted = 1000
        while deleted > 0:
            if stop_check is not None and stop_check():
                break
            deleted = self.execute_cypher('''MATCH (n:%s) with n LIMIT 50000 DELETE n return count(n) as deleted_count ''' % (self.corpus_name)).evaluate()
            num_deleted += deleted
            if call_back is not None:
                call_back(num_deleted)
        self.reset_hierarchy()
        self.hierarchy = Hierarchy({})

    def reset(self, call_back = None, stop_check = None):
        '''
        Reset the graph and SQL databases for a corpus.
        '''
        self.reset_graph(call_back, stop_check)
        try:
            Base.metadata.drop_all(self.engine)
        except sqlalchemy.exc.OperationalError:
            pass
        Base.metadata.create_all(self.engine)

    def query_graph(self, annotation_type):
        '''
        Return a :class:`~polyglotdb.config.GraphQuery` for the specified annotation type.

        When extending :class:`~polyglotdb.config.GraphQuery` functionality, this function must be
        overwritten.

        Parameters
        ----------
        annotation_type : str
            The type of annotation to look for in the corpus
        '''
        if annotation_type.type not in self.hierarchy.annotation_types \
                and annotation_type.type != 'pause': #FIXME make more general
            raise(GraphQueryError('The graph does not have any annotations of type \'{}\'.  Possible types are: {}'.format(annotation_type.name, ', '.join(sorted(self.hierarchy.annotation_types)))))
        if self.config.query_behavior == 'speaker':
            cls = SpeakerGraphQuery
        elif self.config.query_behavior == 'discourse':
            cls = DiscourseGraphQuery
        else:
            cls = GraphQuery
        return cls(self, annotation_type)

    @property
    def annotation_types(self):
        return self.hierarchy.annotation_types

    @property
    def lowest_annotation(self):
        '''
        Returns the annotation type that is the lowest in the hierarchy
        of containment.
        '''
        return self.hierarchy.lowest

    def remove_discourse(self, name):
        '''
        Remove the nodes and relationships associated with a single
        discourse in the corpus.

        Parameters
        ----------
        name : str
            Name of the discourse to remove
        '''
        self.execute_cypher('''MATCH (n:%s:%s)-[r]->() DELETE n, r'''
                                    % (self.corpus_name, name))

    def discourse(self, name, annotations = None):
        '''
        Get all words spoken in a discourse.

        Parameters
        ----------
        name : str
            Name of the discourse
        '''

        w = getattr(self, self.word_name) #FIXME make more general
        q = GraphQuery(self, w)
        q = q.filter(w.discourse.name == name)
        q = q.order_by(w.begin)
        return q.all()
def UserStory8(Neo4JPwd, Dbname, redisdb):
    ## Intializations ###
    result = []
    result2 = []
    result3 = []
    drug = []
    disease = []
    disease_drug = []
    disease_drug2 = []
    Drugsofdiseaseslist = []
    Sortedlistelements = []
    Top5Drugs = []

    # Neo Graph
    graph = Graph(password=Neo4JPwd)

    # Redis connection
    r = redis.Redis(db=redisdb)
    pipeline = r.pipeline()

    val = input("Please enter season number for your choice(1-4)\n"
                "(1 - Rainy,\n 2 - Summer,\n 3 - Winter,\n 4-Spring): ")

    if (val == "1"):
        patterns = "*-0[6-9]-*SEARCH*"
        season = "rainy"
    elif (val == "2"):
        patterns = "*-0[3-5]-*SEARCH*"
        season = "summer"
    elif (val == "3"):
        patterns = "*-[10][0-1]-*SEARCH*"
        season = "winter"
    else:
        patterns = "*-0[2-3]-*SEARCH*"
        season = "spring"

    ### Redis Search - for all seacrhed topics####
    x = r.keys(pattern=patterns)

    for y in x:
        pipeline.lrange(y, 0, 100)

    result = pipeline.execute()

    for y in result:
        result2.append(
            y[0].decode('utf-8'))  ###decoding the elements(removing eccoding)

    # sorted(result2, key=result2.count, reverse=True)
    result2 = list(
        chain.from_iterable(
            repeat(i, c) for i, c in Counter(result2).most_common())
    )  ###sorting the elemts on basis of count
    result3 = list(dict.fromkeys(result2))  ###removing duplicates

    ### Mongo Search - for filter out drugs from search topics####
    client = MongoClient()
    db = client[Dbname]

    drug = [
        y['Drug_Name'] for temp in result3
        for y in db.Drugs.find({"Drug_Name": temp}, {
            "Drug_Name": 1,
            "_id": 0
        })
    ]

    ## Mongo Search - for filter out diseases from search topics####
    disease = [
        y['Disease'] for temp in result3
        for y in db.Disease.find({"Disease": temp}, {
            "Disease": 1,
            "_id": 0
        })
    ]

    ### Neo Search - to find drug of the diseases####

    disease_drug = [graph.run("""match(n:Disease)<-[:Medicine_For]-(b) where n.name=\"""" + \
                              temp2 + """\" return b.Drug_Name as Drug""").data() for temp2 in disease]

    disease_drug2 = list2 = [x for x in disease_drug
                             if x != []]  ###remove null elements list

    for irr1 in disease_drug2:
        for irr2 in irr1:
            drugs = irr2.get('Drug')
            Drugsofdiseaseslist.append(drugs)

    def Sort_Tuple(tup):
        return (sorted(tup, key=lambda x: x[1], reverse=True))

    counts = Counter(drug)
    counts.update(Drugsofdiseaseslist)
    Sortedlistelements = Sort_Tuple(counts.items())
    Top5Drugs = Sortedlistelements[:10]
    newdict = dict(Top5Drugs)
    names = list(newdict.keys())
    values = list(newdict.values())

    # bar graph for top 5 drugs
    plt.figure(figsize=(12, 12))
    plt.bar(range(len(newdict)), values, tick_label=names)
    plt.xticks(range(len(newdict)), names, rotation=90)
    plt.xlabel('Drugs', fontsize=18)
    plt.ylabel('Count of Drugs', fontsize=16)
    plt.savefig(season + '.png', bbox_inches='tight')
    print(
        "Output Figure is saved with respective season name for more clarity")
    print("displaying top 10 trending drugs for", season + " season")
    plt.show()
Exemple #49
0
import requests, re
from lxml import html
from py2neo import Graph, Node, Relationship

# find a numeric ID in a URL (without any other digits)
findid = re.compile(r"\d+")

# change the Neo4j password to yours
g = Graph(user="******", password="******")

# reset the graph
# take care to use an underscore in ARTIST_OF
# or in queries you will need to use tic quotes ``
g.run('MATCH () -[r:ARTIST_OF] -> () DELETE r;')
g.run('MATCH (n:Artist) DELETE n;')
g.run('MATCH (m:Artwork) DELETE m;')

def ScrapeCollection(workID):
    page = requests.get('http://moma.org/collection/works/' + str(workID))
    tree = html.fromstring(page.content)

    # the title is a complex, potentially italicized field
    titles = tree.cssselect('.short-caption h1.object-tile--gothic')
    for title in titles:
        full_title = title.text.strip()
        break

    # the date is a string field which can be a year, range of years, or approximation
    dates = tree.cssselect('.short-caption h3')
    for date in dates:
        first_date = date.text.strip()
def UserStory9(Neo4JPwd, redisdb):
    graph = Graph(password=Neo4JPwd)
    drug_in = input("Please enter your Drug Name: ")
    result3 = graph.run(
        "match (a:Drug)-[:Medicine_For]->()<-[:Medicine_For]-(b:Drug) where a.Drug_Name='"
        + drug_in + "' return b.Drug_Name").data()

    if not result3:
        print(
            "Drug Name entered didn't match any drug names in the database. Hence ending script."
        )
        return

    print("This will take around 30 seconds...")

    plt.style.use("ggplot")

    r = redis.Redis(db=redisdb)
    pipeline = r.pipeline()

    result = []
    disease_list = []
    trend_list = []
    result2 = []
    x = r.keys(pattern="*SEARCH*")

    for y in x:
        temp = {}
        temp['Key'] = y.decode("utf-8").split(":")[1][3:]
        pipeline.lrange(y, 0, 100)
        result.append(temp)

    pip = pipeline.execute()

    for var, car in zip(result, pip):
        temp = {}
        temp['Key'] = var['Key']
        temp['Value'] = car[0].decode("utf-8")
        result2.append(temp)

    df1 = pd.DataFrame(result2)
    df1['Key'] = pd.to_datetime(df1['Key'], format='%m-%Y')

    for z in result3:
        disease_list.append(z['b.Drug_Name'])

    disease_list = list(set(disease_list))

    print("Competitor Drugs: -")
    flag = 0
    for dl in disease_list:
        flag = flag + 1
        print(flag, ".", dl)

    comp_in = input(
        "Please enter name of Competitor Drug you wanna see trend with - ")

    if comp_in not in disease_list:
        print(
            "Entered Competitor Drug wasn't from the list. Hence ending program."
        )
        return

    trend_list.append(drug_in)
    trend_list.append(comp_in)

    df2 = df1[df1['Value'].isin(trend_list)]
    df3 = df2.groupby(['Key', 'Value']).size().reset_index(name='counts')

    final = pd.pivot_table(df3, index=['Key'], columns=['Value'], fill_value=0)
    final.columns = final.columns.droplevel(0)
    final.sort_values(by=['Key'])

    final.plot(figsize=(20, 10)).legend(title='Drugs')
    plt.show()
from py2neo import Graph

graph = Graph(password='******')

results = graph.run(
    'MATCH (s:asn)-[r:TO]->(d:asn) '
    'RETURN s.name as source, r as relationship, d.name as dest '
    'LIMIT 100'
)

for source, relationship, dest in results:
    print('source: {} dest: {} relationship: {}'.format(source, dest, relationship))
Exemple #52
0
    def setUp(self):
        self.app = app.app.test_client()

        graph = Graph(os.environ.get('GRAPHENEDB_URL', 'http://localhost:7474'),bolt=False)
        graph.run("MATCH (a) DETACH DELETE a") #clears graph
Exemple #53
0
class MedicalGraph:
    def __init__(self):
        cur_dir = '/'.join(os.path.abspath(__file__).split('/')[:-1])
        self.data_path = os.path.join(cur_dir, 'disease.csv')
        self.graph = Graph("http://localhost:7474", username="******", password="******")

    def read_file(self):
        """
        读取文件,获得实体,实体关系
        :return:
        """
        # cols = ["name", "alias", "part", "age", "infection", "insurance", "department", "checklist", "symptom",
        #         "complication", "treatment", "drug", "period", "rate", "money"]
        # 实体
        diseases = []  # 疾病
        aliases = []  # 别名
        symptoms = []  # 症状
        parts = []  # 部位
        departments = []  # 科室
        complications = []  # 并发症
        drugs = []  # 药品

        # 疾病的属性:age, infection, insurance, checklist, treatment, period, rate, money
        diseases_infos = []
        # 关系
        disease_to_symptom = []  # 疾病与症状关系
        disease_to_alias = []  # 疾病与别名关系
        diseases_to_part = []  # 疾病与部位关系
        disease_to_department = []  # 疾病与科室关系
        disease_to_complication = []  # 疾病与并发症关系
        disease_to_drug = []  # 疾病与药品关系

        all_data = pd.read_csv(self.data_path, encoding='gb18030').loc[:, :].values
        for data in all_data:
            disease_dict = {}  # 疾病信息
            # 疾病
            disease = str(data[0]).replace("...", " ").strip()
            disease_dict["name"] = disease
            # 别名
            line = re.sub("[,、;,.;]", " ", str(data[1])) if str(data[1]) else "未知"
            for alias in line.strip().split():
                aliases.append(alias)
                disease_to_alias.append([disease, alias])
            # 部位
            part_list = str(data[2]).strip().split() if str(data[2]) else "未知"
            for part in part_list:
                parts.append(part)
                diseases_to_part.append([disease, part])
            # 年龄
            age = str(data[3]).strip()
            disease_dict["age"] = age
            # 传染性
            infect = str(data[4]).strip()
            disease_dict["infection"] = infect
            # 医保
            insurance = str(data[5]).strip()
            disease_dict["insurance"] = insurance
            # 科室
            department_list = str(data[6]).strip().split()
            for department in department_list:
                departments.append(department)
                disease_to_department.append([disease, department])
            # 检查项
            check = str(data[7]).strip()
            disease_dict["checklist"] = check
            # 症状
            symptom_list = str(data[8]).replace("...", " ").strip().split()[:-1]
            for symptom in symptom_list:
                symptoms.append(symptom)
                disease_to_symptom.append([disease, symptom])
            # 并发症
            complication_list = str(data[9]).strip().split()[:-1] if str(data[9]) else "未知"
            for complication in complication_list:
                complications.append(complication)
                disease_to_complication.append([disease, complication])
            # 治疗方法
            treat = str(data[10]).strip()[:-4]
            disease_dict["treatment"] = treat
            # 药品
            drug_string = str(data[11]).replace("...", " ").strip()
            for drug in drug_string.split()[:-1]:
                drugs.append(drug)
                disease_to_drug.append([disease, drug])
            # 治愈周期
            period = str(data[12]).strip()
            disease_dict["period"] = period
            # 治愈率
            rate = str(data[13]).strip()
            disease_dict["rate"] = rate
            # 费用
            money = str(data[14]).strip() if str(data[14]) else "未知"
            disease_dict["money"] = money

            diseases_infos.append(disease_dict)

        return set(diseases), set(symptoms), set(aliases), set(parts), set(departments), set(complications), \
                set(drugs), disease_to_alias, disease_to_symptom, diseases_to_part, disease_to_department, \
                disease_to_complication, disease_to_drug, diseases_infos

    def create_node(self, label, nodes):
        """
        创建节点
        :param label: 标签
        :param nodes: 节点
        :return:
        """
        count = 0
        for node_name in nodes:
            node = Node(label, name=node_name)
            self.graph.create(node)
            count += 1
            print(count, len(nodes))
        return

    def create_diseases_nodes(self, disease_info):
        """
        创建疾病节点的属性
        :param disease_info: list(Dict)
        :return:
        """
        count = 0
        for disease_dict in disease_info:
            node = Node("Disease", name=disease_dict['name'], age=disease_dict['age'],
                        infection=disease_dict['infection'], insurance=disease_dict['insurance'],
                        treatment=disease_dict['treatment'], checklist=disease_dict['checklist'],
                        period=disease_dict['period'], rate=disease_dict['rate'],
                        money=disease_dict['money'])
            self.graph.create(node)
            count += 1
            print(count)
        return

    def create_graphNodes(self):
        """
        创建知识图谱实体
        :return:
        """
        disease, symptom, alias, part, department, complication, drug, rel_alias, rel_symptom, rel_part, \
        rel_department, rel_complication, rel_drug, rel_infos = self.read_file()
        self.create_diseases_nodes(rel_infos)
        self.create_node("Symptom", symptom)
        self.create_node("Alias", alias)
        self.create_node("Part", part)
        self.create_node("Department", department)
        self.create_node("Complication", complication)
        self.create_node("Drug", drug)

        return

    def create_graphRels(self):
        disease, symptom, alias, part, department, complication, drug, rel_alias, rel_symptom, rel_part, \
        rel_department, rel_complication, rel_drug, rel_infos = self.read_file()

        self.create_relationship("Disease", "Alias", rel_alias, "ALIAS_IS", "别名")
        self.create_relationship("Disease", "Symptom", rel_symptom, "HAS_SYMPTOM", "症状")
        self.create_relationship("Disease", "Part", rel_part, "PART_IS", "发病部位")
        self.create_relationship("Disease", "Department", rel_department, "DEPARTMENT_IS", "所属科室")
        self.create_relationship("Disease", "Complication", rel_complication, "HAS_COMPLICATION", "并发症")
        self.create_relationship("Disease", "Drug", rel_drug, "HAS_DRUG", "药品")

    def create_relationship(self, start_node, end_node, edges, rel_type, rel_name):
        """
        创建实体关系边
        :param start_node:
        :param end_node:
        :param edges:
        :param rel_type:
        :param rel_name:
        :return:
        """
        count = 0
        # 去重处理
        set_edges = []
        for edge in edges:
            set_edges.append('###'.join(edge))
        all = len(set(set_edges))
        for edge in set(set_edges):
            edge = edge.split('###')
            p = edge[0]
            q = edge[1]
            query = "match(p:%s),(q:%s) where p.name='%s'and q.name='%s' create (p)-[rel:%s{name:'%s'}]->(q)" % (
                start_node, end_node, p, q, rel_type, rel_name)
            try:
                self.graph.run(query)
                count += 1
                print(rel_type, count, all)
            except Exception as e:
                print(e)
        return
MATCH (p:Person)
where p.name="张柏芝"
return p
'''
#清空数据库
#data = graph.run('MATCH (n) OPTIONAL MATCH (n)-[r]-() DELETE n,r')
data = graph.run(cql)
print(list(data)[0]['p']["biography"])
"""

#导入节点 电影类型  == 注意类型转换
cql = '''
LOAD CSV WITH HEADERS  FROM "file:///genre.csv" AS line
MERGE (p:Genre{gid:toInteger(line.gid),name:line.gname})
'''
result = graph.run(cql)
print(result, "电影类型 存储成功")

#导入节点 演员信息
cql = '''
LOAD CSV WITH HEADERS FROM 'file:///person.csv' AS line
MERGE (p:Person { pid:toInteger(line.pid),birth:line.birth,
death:line.death,name:line.name,
biography:line.biography,
birthplace:line.birthplace})
'''
result = graph.run(cql)
print(result, "演员信息 存储成功")

#导入节点 电影信息
cql = '''
#         # source_type: 13 selected
#         # allSourcesFlag: 1
#         # searchType : 2
#     # output format: TEXT

# ============================================= ADD RXCUI to node Prescription ============================================
    # save return text file in improt directory
    #====== in bash: extract rows with rxcui  =======#
    # cd to db/import directory
    # $awk '/\|RXCUI\|/' 4d0f0dc897a98b673797bcddf13c1db3.text > rxcui_pc.txt

    pc_rxcui = '/Users/yaqi/Documents/Neo4j/load_pc_drug_df copy/import/rxcui_pc.txt'
    pc_rxcui_df = pd.read_csv(pc_rxcui, sep = '|',  header=None)

    idx = '''CREATE INDEX ON :Prescription(rxcui) '''
    g.run(idx)

    pc_rxcui_df.apply(add_rxcui_Prescription,args=(g,), axis=1)
    print("finish adding rxcui to :Prescription")

    # print(genericName.shape) (23584079,)

# ============================================= Extract GenericName from GenericDrug============================================
#     q1 = '''
#     MATCH (gd:GenericDrug) RETURN gd.genericName'''
#
#     names = g.run(q1)
#     with open('/Users/yaqi/Documents/Neo4j/load_pc_drug_df/import/drug_genericName.txt', 'w') as text_file:
#         for name in names:
#
#             line = name['gd.genericName']+ '\n'
class CamaraDosDeputados:
    def __init__(self):
        # 1: username 2: password 3: port
        #                          |1 | 2 |          | 3 |
        self.graph = Graph("bolt://localhost:7687", auth=('neo4j', 'abc'))

    def init_db(self):
        self.delete_all()
        self.create_constraints()

        self.depIds = self.get_dep_ids()
        self.partyIds = self.get_party_ids()

        self.init_deputados()
        self.init_despesas()
        self.init_orgaos()
        self.init_partidos()

    def delete_all(self):
        self.graph.run("MATCH(n) DETACH DELETE n")

    def create_constraints(self):
        pass
        """ self.graph.run("CREATE CONSTRAINT ON (d:Deputado) ASSERT d.nome is UNIQUE;")
        self.graph.run("CREATE CONSTRAINT ON (d:Deputado) ASSERT d.id is UNIQUE;")
        self.graph.run("CREATE CONSTRAINT ON (p:Partido) ASSERT p.sigla is UNIQUE;")
        self.graph.run("CREATE CONSTRAINT ON (m:Municipio) ASSERT m.nome is UNIQUE;")
        self.graph.run("CREATE CONSTRAINT ON (uf:UnidadeFederativa) ASSERT uf.sigla is UNIQUE;")
        self.graph.run("CREATE CONSTRAINT ON (o:Orgao) ASSERT o.idOrgao is UNIQUE;") """

    def get_dep_ids(self):
        get_depIds_query = """
            WITH 'https://dadosabertos.camara.leg.br/api/v2/deputados?ordem=ASC&ordenarPor=nome' AS url
            CALL apoc.load.json(url) YIELD value
            UNWIND value.dados as dados
            RETURN dados.id
        """
        depIds = [r['dados.id'] for r in self.graph.run(get_depIds_query)]
        return depIds

    def get_party_ids(self):
        get_partyIds_query = """
            WITH 'https://dadosabertos.camara.leg.br/api/v2/partidos?itens=10000&ordem=ASC&ordenarPor=sigla' AS url
            CALL apoc.load.json(url) YIELD value
            UNWIND value.dados as dados
            RETURN dados.id
        """
        partyIds = [r['dados.id'] for r in self.graph.run(get_partyIds_query)]
        return partyIds

    def init_deputados(self):
        for id in self.depIds:
            init_deputado_query = """
                WITH 'https://dadosabertos.camara.leg.br/api/v2/deputados/{id}'""".format(
                id=id) + """ AS url
                CALL apoc.load.json(url) YIELD value
                UNWIND value.dados as dados

                MERGE(d:Deputado {id : toInteger(dados.id), nomeCivil : dados.nomeCivil})
                    ON CREATE SET d.nome = dados.ultimoStatus.nome, d.idLegislatura = dados.ultimoStatus.idLegislatura, d.uri = dados.uri, d.urlFoto = dados.ultimoStatus.urlFoto,
                    d.sexo = dados.sexo, d.nascimento = DATE(dados.dataNascimento), d.cpf = dados.cpf, d.email = dados.ultimoStatus.gabinete.email, d.escolaridade = dados.escolaridade

                MERGE(p:Partido {sigla : dados.ultimoStatus.siglaPartido})
                    ON CREATE SET p.uri = dados.ultimoStatus.uriPartido

                MERGE (d)-[:FILIADO]-(p)

                FOREACH(t IN CASE WHEN dados.ufNascimento IS NOT NULL THEN [1] else [] END |
                    MERGE(m:Municipio {nome: dados.municipioNascimento})
                    MERGE(uf: UnidadeFederativa {sigla: dados.ufNascimento})
                    MERGE (d)-[:ORIGEM]->(m)
                    MERGE (m)-[:SITUADO]-(uf)
                )		
            """
            self.graph.run(init_deputado_query)

    def init_despesas(self):
        for id in self.depIds:
            init_despesas_query = """
                WITH 'https://dadosabertos.camara.leg.br/api/v2/deputados/{id}/despesas?ano=2019&ano=2020&itens=100000&ordem=ASC&ordenarPor=mes'""".format(
                id=id) + """ AS url
                CALL apoc.load.json(url) YIELD value
                UNWIND value.dados as despesas
                """ + "MATCH (dep:Deputado {id:" + str(id) + "})" + """
                FOREACH(dados in despesas | 
                    MERGE (t:TipoDespesa {tipo: dados.tipoDespesa})
                    CREATE (des:Despesa {valorDocumento: dados.valorDocumento, codDocumento: dados.codDocumento, nomeFornecedor: dados.nomeFornecedor, urlDocumento: dados.urlDocumento, tipo: dados.tipoDocumento})
                    CREATE (des)-[:TIPODESPESA]->(t)
                    CREATE (dep)-[:GASTOU {data: DATE(dados.dataDocumento)}]->(des)
                )
            """
            self.graph.run(init_despesas_query)

    def init_orgaos(self):
        for id in self.depIds:
            init_orgaos_query = """
                WITH 'https://dadosabertos.camara.leg.br/api/v2/deputados/{id}/orgaos?dataInicio=2019-01-01&itens=100000&ordem=ASC&ordenarPor=dataInicio'""".format(
                id=id) + """ AS url
                CALL apoc.load.json(url) YIELD value
                UNWIND value.dados as orgaos
                """ + "MATCH (dep:Deputado {id:" + str(id) + "})" + """
                FOREACH(orgao in orgaos |
                    MERGE (o:Orgao {idOrgao: toInteger(orgao.idOrgao)})
                        ON CREATE SET o.uriOrgao = orgao.uriOrgao, o.siglaOrgao = orgao.siglaOrgao, o.nomeOrgao = orgao.nomeOrgao

                    CREATE (dep)-[:PARTICIPA {titulo: orgao.titulo, dataInicio: DATE(left(orgao.dataInicio,10)), dataFim: DATE(left(orgao.dataFim,10))}]->(o)
                )
            """
            self.graph.run(init_orgaos_query)

    def init_partidos(self):
        for id in self.partyIds:
            init_party_query = """
                WITH 'https://dadosabertos.camara.leg.br/api/v2/partidos/{id}'""".format(
                id=id) + """ AS url
                CALL apoc.load.json(url) YIELD value
                UNWIND value.dados as dados

                WITH dados
                MATCH (p:Partido {sigla: dados.sigla})
                MATCH (d:Deputado {nome: dados.status.lider.nome})
                MERGE (d)-[:LIDER]-(p)
                SET p.id = toInteger(dados.id), p.nome = dados.nome, p.situacao = dados.status.situacao,
                p.totalMembros = toInteger(dados.status.totalMembros), p.urlLogo = dados.urlLogo
            """
            self.graph.run(init_party_query)

    def get_all_query(self):
        return "\"MATCH a=(:Deputado)-[]-(:Partido) RETURN a\""

    def get_deputados_query(self):
        return "\"MATCH (d:Deputado) RETURN d\""

    def get_partidos_query(self):
        return "\"MATCH (p:Partido) RETURN p\""

    def get_orgaos_query(self):
        return "\"MATCH (o:Orgao) RETURN o\""

    def get_deputados(self):
        query = """
            MATCH(dep:Deputado)
            RETURN dep.nome
            ORDER BY dep.nome
        """
        deputados = []
        for record in self.graph.run(query):
            deputados.append(record["dep.nome"])
        return deputados

    def get_partidos(self):
        query = """
            MATCH(p:Partido)
            RETURN p.sigla
            ORDER BY p.totalMembros DESC
        """
        partidos = []
        for record in self.graph.run(query):
            partidos.append(record["p.sigla"])
        return partidos

    def get_orgaos(self):
        query = """
            MATCH(o:Orgao)
            RETURN o.siglaOrgao
            ORDER BY o.siglaOrgao
        """
        orgaos = []
        for record in self.graph.run(query):
            orgaos.append(record["o.siglaOrgao"])
        return orgaos

    def get_deputado_info(self, deputado_name):
        query = """
            MATCH (d:Deputado)
            WHERE d.nome =
        """
        query += '\"' + deputado_name + '\"'
        query += """
            RETURN d.nascimento, d.nomeCivil, d.urlFoto, d.cpf, d.escolaridade, d.sexo, d.nome, d.idLegislatura, d.id
        """
        for record in self.graph.run(query):
            return record

    def get_deputado_relations_query(self, deputado_name):
        query = "\"MATCH a=(d:Deputado)-[]-() WHERE d.nome = '" + deputado_name + "' RETURN a\""
        return query

    def get_partido_info(self, partido_name):
        query = """
            MATCH (p:Partido)
            WHERE p.sigla =
        """
        query += '\"' + partido_name + '\"'
        query += """
            RETURN p.totalMembros, p.sigla, p.situacao, p.nome, p.id, p.urlLogo
        """
        for record in self.graph.run(query):
            return record

    def get_orgao_info(self, orgao_name):
        query = """
            MATCH (o:Orgao)
            WHERE o.siglaOrgao =
        """
        query += '\"' + orgao_name + '\"'
        query += """
            RETURN o.nomeOrgao, o.siglaOrgao, o.idOrgao
        """
        for record in self.graph.run(query):
            return record
if __name__ == "__main__":
    pw = os.environ.get('NEO4J_PASS')
    g = Graph("http://localhost:7474/", password=pw)  ## readme need to document setting environment variable in pycharm
    tx = g.begin()

    q1 = '''
    MATCH (cl: Client)
    RETURN id(cl), cl.clientName
    '''

    q2 = '''
    MATCH (df:DrugFirm)
    RETURN id(df), df.firmName'''

    client_obj = g.run(q1)
    df_obj = g.run(q2)

    client_lst = []
    for client in client_obj:
        client_dic = {}
        client_dic['id'] = client['id(cl)']
        client_dic['clientName'] = client['cl.clientName']
        client_lst.append(client_dic)

    df_lst = []
    for object in df_obj:
        df_dic = {}
        df_dic['id'] = object['id(df)']
        df_dic['firmName'] = object['df.firmName']
        df_lst.append(df_dic)
r12 = Relationship(node_6, '助班', node_7)
r13 = Relationship(node_8, '班主任', node_1)
r14 = Relationship(node_8, '班主任', node_2)
r15 = Relationship(node_8, '班主任', node_3)
r16 = Relationship(node_8, '班主任', node_4)
r17 = Relationship(node_8, '班主任', node_7)

S = node_1 | node_2 | node_3 | node_4 | node_5 | node_6 | node_7 | node_8 | node_9 | node_10
s = r1 | r2 | r3 | r4 | r5 | r6 | r7 | r8 | r9 | r10 | r11 | r12 | r13 | r14 | r1 | r15 | r16 | r17
graph.create(S)
graph.create(s)
A = graph.data("Match(female:Person) where female.sex = '女' return female")
print("\t查询数据库中的女性信息:")
for a in A:
    print(a)

B = graph.data("MATCH(n:Person)-[:班主任]->(student:Person) return student")
print("\n\t输出覃老师的学生信息:")
for b in B:
    print(b)

node = graph.find_one(label='Person', property_key='name', property_value="覃晓")
node['age'] = 30
graph.push(node)
Data = graph.find_one(label='Person', property_key='name', property_value="覃晓")
print("\n\t输出覃老师修改后的信息:")
print(Data)

#删除刘旭鹏的个人信息以及关系
graph.run('MATCH (p:Person{name:"刘旭鹏"})-[r]-() detach delete r,p ')
class Neo4j():
    graph = None

    def __init__(self):
        print("create neo4j class ...")

    def connectDB(self):
        self.graph = Graph("http://localhost:7474",
                           username="******",
                           password="******")

    def matchItembyTitle(self, value):

        sql = "MATCH (n:Item { title: '" + str(value) + "' }) return n;"
        answer = self.graph.run(sql).data()
        return answer

    # 根据title值返回互动百科item
    def matchHudongItembyTitle(self, value):
        sql = "MATCH (n:HudongItem { title: '" + str(value) + "' }) return n;"
        try:
            answer = self.graph.run(sql).data()
        except:
            print(sql)
        return answer

    # 根据entity的名称返回关系
    def getEntityRelationbyEntity(self, value):  # title
        answer = self.graph.run(
            "MATCH (entity1) - [rel] -> (entity2)  WHERE entity1.name = \"" +
            str(value) + "\" RETURN rel,entity2").data()
        return answer

    '''
    关系查询   下面
    '''

    # 查找entity1及其对应的关系(与getEntityRelationbyEntity的差别就是返回值不一样)
    def findRelationByEntity(self, entity1):
        # title
        answer = self.graph.run("MATCH (n1 {name:\"" + str(entity1) +
                                "\"})- [rel] -> (n2) RETURN n1,rel,n2").data()
        # if(answer is None):
        # 	answer = self.graph.run("MATCH (n1:NewNode {title:\""+entity1+"\"})- [rel] -> (n2) RETURN n1,rel,n2" ).data()
        return answer

    # 查找entity2及其对应的关系
    def findRelationByEntity2(self, entity1):  # title
        answer = self.graph.run("MATCH (n1)- [rel] -> (n2 {name:\"" +
                                str(entity1) + "\"}) RETURN n1,rel,n2").data()

        # if(answer is None):
        # 	answer = self.graph.run("MATCH (n1)- [rel] -> (n2:NewNode {title:\""+entity1+"\"}) RETURN n1,rel,n2" ).data()
        return answer

    # 根据entity1和关系查找enitty2
    def findOtherEntities(self, entity, relation):
        # title                                #type
        answer = self.graph.run("MATCH (n1 {name:\"" + str(entity) +
                                "\"})- [rel {name:\"" + str(relation) +
                                "\"}] -> (n2) RETURN n1,rel,n2").data()
        # if(answer is None):
        #	answer = self.graph.run("MATCH (n1:NewNode {title:\"" + entity + "\"})- [rel:RELATION {type:\""+relation+"\"}] -> (n2) RETURN n1,rel,n2" ).data()

        return answer

    # 根据entity2和关系查找enitty1
    def findOtherEntities2(
            self, entity,
            relation):  # type                                 #title
        answer = self.graph.run("MATCH (n1)- [rel {name:\"" + str(relation) +
                                "\"}] -> (n2 {name:\"" + str(entity) +
                                "\"}) RETURN n1,rel,n2").data()
        # if(answer is None):
        #	answer = self.graph.run("MATCH (n1)- [rel:RELATION {type:\""+relation+"\"}] -> (n2:NewNode {title:\"" + entity + "\"}) RETURN n1,rel,n2" ).data()

        return answer

    # 根据两个实体查询它们之间的最短路径
    def findRelationByEntities(self, entity1, entity2):
        # HudongItem  title         acompany_with                              title
        answer = self.graph.run(
            "MATCH (p1:Disease {name:\"" + str(entity1) +
            "\"}),(p2:Disease{name:\"" + str(entity2) +
            "\"}),p=shortestpath((p1)-[rel:RELATION*]-(p2)) RETURN rel"
        ).evaluate()
        # answer = self.graph.run("MATCH (p1:HudongItem {name:\"" + entity1 + "\"})-[rel:RELATION]-(p2:HudongItem{title:\""+entity2+"\"}) RETURN p1,p2").data()

        if (answer is None):
            answer = self.graph.run(
                "MATCH (p1:Disease {name:\"" + str(entity1) +
                "\"}),(p2:Disease {name:\"" + str(entity2) +
                "\"}),p=shortestpath((p1)-[rel:RELATION*]-(p2)) RETURN p"
            ).evaluate()
        if (answer is None):
            answer = self.graph.run(
                "MATCH (p1:Disease {name:\"" + str(entity1) +
                "\"}),(p2:Disease{name:\"" + str(entity2) +
                "\"}),p=shortestpath((p1)-[rel:RELATION*]-(p2)) RETURN p"
            ).evaluate()
        if (answer is None):
            answer = self.graph.run(
                "MATCH (p1:Disease {name:\"" + str(entity1) +
                "\"}),(p2:Disease {name:\"" + str(entity2) +
                "\"}),p=shortestpath((p1)-[rel:RELATION*]-(p2)) RETURN p"
            ).evaluate()
        # answer = self.graph.data("MATCH (n1:HudongItem {title:\"" + entity1 + "\"})- [rel] -> (n2:HudongItem{title:\""+entity2+"\"}) RETURN n1,rel,n2" )
        # if(answer is None):
        #	answer = self.graph.data("MATCH (n1:HudongItem {title:\"" + entity1 + "\"})- [rel] -> (n2:NewNode{title:\""+entity2+"\"}) RETURN n1,rel,n2" )
        # if(answer is None):
        #	answer = self.graph.data("MATCH (n1:NewNode {title:\"" + entity1 + "\"})- [rel] -> (n2:HudongItem{title:\""+entity2+"\"}) RETURN n1,rel,n2" )
        # if(answer is None):
        #	answer = self.graph.data("MATCH (n1:NewNode {title:\"" + entity1 + "\"})- [rel] -> (n2:NewNode{title:\""+entity2+"\"}) RETURN n1,rel,n2" )
        relationDict = []
        if (answer is not None):
            for x in answer:
                tmp = {}
                start_node = x.start_node
                end_node = x.end_node
                tmp['n1'] = start_node
                tmp['n2'] = end_node
                tmp['rel'] = x
                relationDict.append(tmp)
        return relationDict

    # 查询数据库中是否有对应的实体-关系匹配
    def findEntityRelation(self, entity1, relation, entity2):
        # HudongItem  title                                        type								HudongItem  title
        answer = self.graph.run("MATCH (n1:Disease {name:\"" + str(entity1) +
                                "\"})- [rel:RELATION {name:\"" +
                                str(relation) + "\"}] -> (n2:Disease{name:\"" +
                                entity2 + "\"}) RETURN n1,rel,n2").data()
        if (answer is None):
            answer = self.graph.run("MATCH (n1:Disease {name:\"" +
                                    str(entity1) +
                                    "\"})- [rel:RELATION {name:\"" +
                                    str(relation) +
                                    "\"}] -> (n2:Disease{name:\"" + entity2 +
                                    "\"}) RETURN n1,rel,n2").data()
        if (answer is None):
            answer = self.graph.run("MATCH (n1:Disease {name:\"" +
                                    str(entity1) +
                                    "\"})- [rel:RELATION {name:\"" +
                                    str(relation) +
                                    "\"}] -> (n2:Disease{name:\"" + entity2 +
                                    "\"}) RETURN n1,rel,n2").data()
        if (answer is None):
            answer = self.graph.run("MATCH (n1:Disease {name:\"" +
                                    str(entity1) +
                                    "\"})- [rel:RELATION {name:\"" +
                                    str(relation) +
                                    "\"}] -> (n2:Disease{name:\"" + entity2 +
                                    "\"}) RETURN n1,rel,n2").data()

        return answer
from rest_framework import status
import threading
import json
import time

# Connect with neo4j db
connected = False
while not connected:
    neo4j_uri = settings.NEO4J_DB['URI']
    if neo4j_uri != '' and neo4j_uri != None:
        neo4j_client = Graph(neo4j_uri,
                             auth=(settings.NEO4J_DB['USER'],
                                   settings.NEO4J_DB['PASS']))

    try:
        neo4j_client.run("Match () Return 1 Limit 1")
        print('Neo4j connection established!')
        connected = True
    except Exception as e:
        print('Neo4j connection fail!', e)
        time.sleep(3)

# Pub/sub
# d = threading.Thread(name='daemon', target=topic_subscribe)
# d.setDaemon(True)
# d.start()
# d.join()


# Subscribe on topic
def topic_subscribe():