Esempio n. 1
1
class TwitterGraph(object):
    '''A class for interfacing with the Neo4j Twitter network database'''

    # Initial setup and linking into the database
    def __init__(self, host_port, user, password):
        '''Makes connection to Neo4j database'''
        # set up authentication parameters
        authenticate(host_port, user, password)
        # connect to authenticated graph database
        url = 'http://{}/db/data/'.format(host_port)
        self.graph = Graph(url)
        try:
            self.graph.schema.create_uniqueness_constraint('User', 'id')
        except: #ConstraintViolationException
            print 'Unique id on Node User already exists'

    # Functions to add data to the database
    def add_following(self, user_id, following_ids, rec_count):
        '''Given a unique user id, adds the relationship for who they follow.
        Adds a User Node with the id if it doesn't exist.'''
        user = Node('User', id=user_id)
        self.graph.merge(user) # important to merge before doing anything
        rec = 1 + rec_count
        # preserving the order of the following. 1 = most recent
        for fid in following_ids:
            user2 = Node('User', id=fid)
            self.graph.merge(user2)
            self.graph.merge(Relationship(user, 'FOLLOWS', user2, rec=rec))
            rec += 1
        user['following_added'] = True
        self.graph.push(user)

    def add_followers(self, user_id, follower_ids, rec_count):
        '''Given a unique user id, adds the relationship for follows them.
        Adds a User Node with the id if it doesn't exist.'''
        user = Node('User', id=user_id)
        self.graph.merge(user)
        rec = 1 + rec_count
        for fid in follower_ids:
            user2 = Node('User', id=fid)
            self.graph.merge(user2)
            self.graph.merge(Relationship(user2, 'FOLLOWS', user, rec=rec))
            rec += 1
        user['followers_added'] = True
        self.graph.push(user)

    def add_user_properties(self, user):
        '''Given a unique user id, adds properties to the existing user Node'''
        try:
            user_id = user.id
            existing_user = Node('User', id=user_id)
            clean_prop_dict = self.__clean_user_dict(user.__dict__)
            self.graph.merge(existing_user)
            for k, v in clean_prop_dict.iteritems():
                existing_user[k] = v
            # add additional label to verified accounts
            if clean_prop_dict['verified']:
                print True
                existing_user.add_label('Verified')
        except:
            # bad user id
            user_id = user['user_id']
            error = user['error']
            existing_user = Node('User', id=user_id)
            self.graph.merge(existing_user)
            existing_user['screen_name'] = 'INVALID'
            existing_user['error'] = error
            print 'Found invalid user id'
        self.graph.push(existing_user)

    def __clean_user_dict(self, user_prop_dict):
        '''Given the '''

        keep = ['contributors_enabled', 'created_at', 'default_profile',
                'default_profile_image', 'description', 'favourites_count',
                'followers_count', 'friends_count', 'geo_enabled', 'id',
                'id_str', 'is_translator', 'lang', 'listed_count', 'location',
                'name', 'profile_image_url_https', 'protected', 'screen_name',
                'statuses_count', 'time_zone', 'utc_offset', 'verified',
                'withheld_in_countries', 'withheld_scope']

        # only keep the above keys for inserting
        clean = {k: v for k, v in user_prop_dict.iteritems() if k in keep}
        image = os.path.splitext(clean['profile_image_url_https'])[0]
        ext = os.path.splitext(clean['profile_image_url_https'])[1]
        clean['profile_image_url_https'] = image.rstrip('_normal') + ext
        # convert date time to string
        clean['created_at_ord'] = clean['created_at'].toordinal()
        clean['created_at'] = clean['created_at'].strftime('%Y-%m-%d %H:%M:%S')
        return clean

    # Functions to query database
    def get_nodes_missing_props(self, limit=100):
        '''Returns the first 100 ids of nodes without user properties'''
        selector = NodeSelector(self.graph)
        selected = selector.select('User').where("_.screen_name IS NULL").limit(limit)
        return [s['id'] for s in selected]

    def get_nodes_missing_props_follb(self, limit=100):
        cypherq = """MATCH (n)-[r:FOLLOWS]->(m)
                     WHERE m.screen_name = 'BernieSanders'
                     AND NOT EXISTS(n.screen_name)
                     RETURN n.id
                     LIMIT 100;"""
        return [i['n.id'] for i in self.graph.run(cypherq).data()]

    def get_nodes_missing_rels(self, rel='FOLLOWING', limit=1):
        '''Returns ids missing the follower or following relationships.
        Valid inputs for rel is FOLLOWING or FOLLOWERS'''
        selector = NodeSelector(self.graph)
        if rel == 'FOLLOWING':
            selected = selector.select('User').where("_.following_added IS NULL").limit(limit)
        elif rel == 'FOLLOWERS':
            selected = selector.select('User').where("_.followers_added IS NULL").limit(limit)
        else:
            # TO DO: flesh out the exception calling
            raise Exception
        return [s['id'] for s in selected]

    def get_nodes_missing_rels_params(self, rel='FOLLOWING'):
        cypherq = """MATCH (n:User)-[r:FOLLOWS]->(m:User)
                                     WHERE n.followers_count >= 1000
                                     AND NOT EXISTS(n.following_added)
                                     AND m.screen_name = 'BernieSanders'
                                     RETURN n.id
                                     LIMIT 100;"""
        return [i['n.id'] for i in self.graph.run(cypherq).data()]

    def get_nodes_missing_rels_bfriends(self, rel='FOLLOWING'):
        cypherq = """MATCH (n:User)<-[r:FOLLOWS]-(m:User)
                                     WHERE m.screen_name = 'BernieSanders'
                                     AND NOT EXISTS(n.following_added)
                                     RETURN n.id
                                     LIMIT 100;"""
        return [i['n.id'] for i in self.graph.run(cypherq).data()]

    def get_nodes_missing_rels_bfriends_step(self, rel='FOLLOWING'):
        cypherq = """MATCH (n:User)<-[r:FOLLOWS]-(m:User)
                                     WHERE m.screen_name = 'BernieSanders'
                                     AND NOT EXISTS(n.following_added)
                                     RETURN n.id
                                     LIMIT 500;"""
        return [i['n.id'] for i in self.graph.run(cypherq).data()[-100:]]
Esempio n. 2
0
def neo_data_update_trigger(type_: str, data: dict):
    """
    在更新数据库的同时触发该函数,同步更新本源neo库
    Args:
        type_:
        data: 该数据或许需要经过一轮处理

    Returns:

    """
    try:
        # cypher = "MATCH a=({s_name:'{}'}) return a".format(data['s_name'])
        graph = Graph(f"bolt://{NEO_HOST}:7687", password="******")
        node_match = NodeMatcher(graph)
        node_iter = node_match.match(type_, s_name=data["s_name"])
        node = list(node_iter)
        if len(node) == 1:
            node = node[0]
            node.update(**data)
            graph.push(node)
            return True
        else:
            return "failed, many data were queried"
    except Exception as _e:
        raise Neo4jUpdateFailedException(_e)
Esempio n. 3
0
def dump_symptoms():
    import os
    import sys
    sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../../../../")
    from med_base.storage.neo4j.models import Symptom
    from med_base.storage.es.models import EntitySymptom
    from elasticsearch_dsl.connections import connections

    from py2neo import Graph
    from conf.settings import NEO4J_URI, ES_HOST
    graph = Graph(NEO4J_URI)
    connections.create_connection(hosts=[ES_HOST])

    cp = CrawlerSymptoms()
    for name, describe, source_url in cp.process():
        logging.debug("name={}, describe={}".format(name, describe))
        entity_symptom = EntitySymptom(name=name, describe=describe, source_url=source_url)
        entity_symptom.save()

        symptom_node = Symptom.match(graph).where(name=name).first()
        if symptom_node:
            pass
        else:
            symptom = Symptom()
            symptom.name = name
            symptom.id = str(entity_symptom._id)
            graph.push(symptom)
Esempio n. 4
0
class NeoPipeline(object):
    def __init__(self, neo_uri, neo_username, neo_password):
        self.neo_uri = neo_uri
        self.neo_username = neo_username
        self.neo_password = neo_password

    @classmethod
    def from_crawler(cls, crawler):
        return cls(neo_uri=crawler.settings.get('NEO_URI'),
                   neo_username=crawler.settings.get('NEO_USERNAME'),
                   neo_password=crawler.settings.get('NeO_PASSWORD'))

    def open_spider(self, spider):
        self.graph = Graph(self.neo_uri,
                           username=self.neo_username,
                           password=self.neo_password)

    def process_item(self, item, spider):
        i = dict(item)
        art = Article()
        art.articleId = i['articleId']
        art.title = i['title']
        art.summary = i['summary']
        art.author = i['author']
        art.tag = i['tag']
        art.url = i['url']
        art.date = i['date']
        art.star = i['star']
        art.score = i['score']
        art.views = i['views']
        art.comments = i['comments']
        art.source = i['source']
        self.graph.push(art)
        return item
Esempio n. 5
0
    def update(cls,
               experiment_id: str,
               graph: Graph,
               neighbours: List[Neighbour] = None,
               leaf: NearestLeaf = None) -> 'SampleNode':
        node = cls.get(experiment_id, graph)

        if neighbours is not None:
            node.neighbours.clear()

            for neighbour in neighbours:
                n = SampleNode()
                n.experiment_id = neighbour.experiment_id
                if n.exists(graph):
                    node.neighbours.add(n, distance=neighbour.distance)

        if leaf is not None:
            node.lineage.clear()

            n = LeafNode()
            n.leaf_id = leaf.leaf_id
            if n.exists(graph):
                node.lineage.add(n, distance=leaf.distance)
            else:
                raise NotFound

        graph.push(node)

        return node
Esempio n. 6
0
def store_in_neo4j(triple):
	from py2neo import Graph, Node, Relationship ,NodeSelector
	graph = Graph('http://52.83.213.55:7474',user ='******',password = '******')
	# graph = Graph('http://localhost:7474',user = '******',password='******')
	select = NodeSelector(graph)
	# 加载entity
	with open('entity.pkl','rb') as f:
		entities = pickle.load(f)
	entities = list(flatten(entities))
	# 添加所有实体为结点
	for en in entities:
		node = Node('Entity',name= en)
		graph.create(node)
	# 遍历三元组,添加节点的属性,结点间关系等
	for en, kw in triple.items():
		node_1 = select.select('Entity').where(name = en).first()
		for item in kw:
			if item[1] in triple.keys():
				node_2 = select.select('Entity').where(name = item[1]).first()
				relate = Relationship(node_1,item[0],node_2)
				graph.create(relate)
			else:
				node_1[item[0]] = item[1]
				graph.push(node_1)
	print('数据存储完毕')
Esempio n. 7
0
def dump_parts():
    import os
    import sys
    sys.path.append(os.path.dirname(os.path.abspath(__file__))+"/../../../../")
    from med_base.storage.neo4j.models import Bodypart, Department
    from conf.settings import NEO4J_URI
    from py2neo import Graph
    graph = Graph(NEO4J_URI)

    cp = CrawlerPart()
    for dt_name, dt_url, dd_name, dd_url in cp.crawler_bodyparts():
        print(dt_name, dt_url, dd_name, dd_url)
        body_dt = Bodypart()
        body_dt.name = dt_name
        body_dt.id = to_uuid(dt_url)
         
        body_dd = Bodypart()
        body_dd.name = dd_name
        body_dd.id = to_uuid(dd_url)
         
        body_dd.partof.add(body_dt)
        graph.push(body_dd)

    for dt_name, dt_url, dd_name, dd_url in cp.crawler_departments():
        print(dt_name, dt_url, dd_name, dd_url)
        depart_dt = Department()
        depart_dt.name = dt_name
        depart_dt.id = to_uuid(dt_url)
         
        depart_dd = Department()
        depart_dd.name = dd_name
        depart_dd.id = to_uuid(dd_url)
         
        depart_dd.partof.add(depart_dt)
        graph.push(depart_dd)
Esempio n. 8
0
def insertDiseaseAndDrug(disease_name, drug_name):
    graph = Graph(host="52.15.135.11", username="******", password="******")

    #test if the disease exist
    disease = graph.run("MATCH (a) WHERE a.name={x} AND a.label={y} RETURN *",
                        x=disease_name,
                        y="Disease").evaluate()
    if disease == None:
        disease = Node(label="Disease", name=disease_name)
        graph.create(disease)

    #test if the drug exist
    drug = graph.run("MATCH (a) WHERE a.name={x} AND a.label={y} RETURN *",
                     x=drug_name,
                     y="Drug").evaluate()
    if drug == None:
        drug = Node(label="Drug", name=drug_name)
        graph.create(drug)

    disease2drug = graph.match_one(start_node=disease, end_node=drug)
    if disease2drug == None:
        disease2drug = Relationship(disease, "CALL", drug)
        disease2drug['count'] = 1
        graph.create(disease2drug)
    else:
        disease2drug['count'] += 1
        graph.push(disease2drug)
Esempio n. 9
0
def insertSympAndDisease(symptom_name, disease_name):
    graph = Graph(host="52.15.135.11", username="******", password="******")

    #test if the symptom exist
    symptom = graph.run("MATCH (a) WHERE a.name={x} AND a.label={y} RETURN *",
                        x=symptom_name,
                        y="Symptom").evaluate()
    if symptom == None:
        symptom = Node(label="Symptom", name=symptom_name)
        graph.create(symptom)

    #test if the disease exist
    disease = graph.run("MATCH (a) WHERE a.name={x} AND a.label={y} RETURN *",
                        x=disease_name,
                        y="Disease").evaluate()
    if disease == None:
        disease = Node(label="Disease", name=disease_name)
        graph.create(disease)

    symptom2disease = graph.match_one(start_node=symptom, end_node=disease)
    if symptom2disease == None:
        symptom2disease = Relationship(symptom, "CALL", disease)
        symptom2disease['count'] = 1
        graph.create(symptom2disease)
    else:
        symptom2disease['count'] += 1
        graph.push(symptom2disease)
Esempio n. 10
0
    def save_node(self, label, properties_dict, unique=True):
        ''' create neo4j node, with a label, and properties '''
        if unique == True:
            length, lst = self.exists_node(label, properties_dict['name'])
            if length > 0:
                #exists update
                g = Graph(password=self.password)
                b = lst[0]
                g.merge(b)
                for k, v in properties_dict.items():
                    b[k] = v
                #b['age'] = properties_dict['age']
                #b['x'] = 8
                g.push(b)

            else:
                #does not exist, insert new
                g = Graph(password=self.password)
                tx = g.begin()
                a = Node(label, **properties_dict)
                tx.create(a)
                tx.commit()
        else:
            # allow new duplicate nodes, why???
            raise Exception("do not allow duplicate named nodes")
def write_symptom_info(symptom_info):
    graph = Graph("bolt://localhost:7687", username="******", password='******')
    node_matcher = NodeMatcher(graph)
    try:
        # 症状
        if symptom_info.get('症状') is not None:
            symptom = node_matcher.match("symptom").where(
                f"_.name = '{symptom_info['症状']}'").first()
            # 此节点还未创建
            if symptom is None:
                symptom = Node('symptom', name=symptom_info['症状'])
                graph.create(symptom)
        # 概述
        if symptom_info.get('概述') is not None:
            print(symptom_info['概述'])
            symptom['brief'] = symptom_info['概述']
            graph.push(symptom)
        # 病因
        if symptom_info.get('病因') is not None:
            symptom['cause'] = symptom_info['病因']
            graph.push(symptom)
        # 检查
        if symptom_info.get('检查') is not None:
            symptom['check'] = symptom_info['检查']
            graph.push(symptom)
        # 诊断
        if symptom_info.get('诊断') is not None:
            symptom['diagnose'] = symptom_info['诊断']
            graph.push(symptom)
        # 预防
        if symptom_info.get('预防') is not None:
            symptom['prevent'] = symptom_info['预防']
            graph.push(symptom)
        # 可能患有的疾病
        if symptom_info.get('可能患有的疾病') is not None:
            for jib in symptom_info['可能患有的疾病']:
                disease = node_matcher.match('disease').where(
                    f"_.name = '{jib}'").first()
                if disease is None:
                    disease = Node('disease', name=jib)
                symptom_disease = Relationship(symptom, 'symptom_disease',
                                               disease)
                graph.create(symptom_disease)
        # 常见症状
        if symptom_info.get('常见症状') is not None:
            for zz in symptom_info['常见症状']:
                r_symptom = node_matcher.match('symptom').where(
                    f"_.name = '{zz}'").first()
                if r_symptom is None:
                    r_symptom = Node('symptom', name=zz)
                symptom_r_symptom = Relationship(symptom, 'r_symptom',
                                                 r_symptom)
                graph.create(symptom_r_symptom)
        return True
    except Exception as e:
        with open('error.txt', 'a') as f:
            f.write(f"write_symptom_info:{symptom_info['症状']}\n{e}\n")
        return False
    return True
Esempio n. 12
0
def lambda_handler(event, context):
    graph = Graph(host=os.environ["NAME_NEO_DOMAIN"],
                  user=os.environ["USER"],
                  password=os.environ["PASSWORD"])

    user = Node("User", id=event['id'])
    graph.merge(user)
    for key, value in event['datas'].items():
        user[key] = value
    graph.push(user)
Esempio n. 13
0
 def post(self, request):
     data = request.data
     graph = Graph("bolt://localhost:7687",
                   auth=("neo4j", "ElectricWizard113"))
     node = Disease(name=data['name'],
                    mesh_code=data['mesh_code'],
                    uuid=str(uuid.uuid4()))
     graph.push(node)
     return HttpResponse(json.dumps(node.to_dict()),
                         content_type='application/json')
    def create(cls, model: Model, graph: Graph) -> 'BaseGraphObject':
        primary_value = getattr(model, cls.__primarykey__)

        node = cls()
        setattr(node, node.__primarykey__, primary_value)

        if node.exists(graph):
            raise AlreadyExisted

        graph.push(node)

        return node
Esempio n. 15
0
def CreateChemicalEntityAndRelationship(file='data/CTD_chemicals.csv'):
    nodecount = 0
    head = [
        "name", "id", "CasRN", "Definition", "ParentIDs", "TreeNumbers",
        "ParentTreeNumbers", "Synonyms", "DrugBankIDs"
    ]
    type = "Chemical"
    Cgraph = Graph("http://localhost:7474")
    with open(file, mode='r', encoding='utf-8') as fr:
        while True:
            line = fr.readline()
            if not line:
                break
            if not line.startswith("#"):
                line_list = line.strip('\n').split(',')
                line_dict = dict(zip(head, line_list))
                Parentslist = line_dict["ParentIDs"].strip().split('|')
                line_dict["ParentIDs"] = Parentslist
                #create node
                exitNodelist = list(
                    Cgraph.nodes.match(type, id=line_dict['id']))
                if len(exitNodelist) >= 1:
                    exitNode = exitNodelist[0]
                    for ele in line_dict:
                        exitNode[ele] = line_dict[ele]
                    Cgraph.push(exitNode)
                    currentNode = exitNode
                else:
                    NewNode = Node(type, **line_dict)
                    Cgraph.create(NewNode)
                    currentNode = NewNode

                #create relationship
                if line_dict["ParentIDs"]:
                    for ParentEle in line_dict["ParentIDs"]:
                        ParentExitNode = Cgraph.nodes.match(
                            type, id=ParentEle).first()
                        if ParentExitNode:
                            childrelation = Relationship(
                                currentNode, 'father', ParentExitNode)
                        else:
                            NewNodeEnd = Node(type, id=ParentEle)
                            childrelation = Relationship(
                                currentNode, 'father', NewNodeEnd)
                        Cgraph.create(childrelation)

                nodecount += 1
                if nodecount % 100 == 0:
                    print(nodecount)
Esempio n. 16
0
    def create(cls, sample: Sample, graph: Graph) -> 'SampleNode':
        node = super().create(sample, graph)

        if sample.nearest_leaf_node:
            n = LeafNode()
            n.leaf_id = sample.nearest_leaf_node.leaf_id
            if n.exists(graph):
                node.lineage.add(n, distance=sample.nearest_leaf_node.distance)

        if sample.nearest_neighbours:
            for neighbour in sample.nearest_neighbours:
                n = cls()
                n.experiment_id = neighbour.experiment_id
                if n.exists(graph):
                    node.neighbours.add(n, distance=neighbour.distance)

        graph.push(node)

        return node
Esempio n. 17
0
    def get(self, params):
        print('connecting')
        graph = Graph("bolt://localhost:7687",
                      auth=("neo4j", "ElectricWizard113"))
        print('done')

        # Create new diagnosis object
        diag = Diagnosis()
        diag.name = 'dfgdfgs'
        diag.evaluation = True
        disease = Disease.match(graph).where(mesh_code='D003424').first()
        diag.diag_disease.add(disease)
        print(disease.to_dict())
        # pp.name='testddd'
        graph.push(diag)
        aa = [a for a in Diagnosis.match(graph)]

        for a in aa:
            print(a.to_dict())
        return HttpResponse(json.dumps('kk'), content_type='application/json')
Esempio n. 18
0
            print(key, ":",page['infobox'][key])
    except:
        print("no infobox")

    indiv_cat = []
    for i in range(1,len(page['parsetree'].split("[[Catégorie:"))):
        cat = page['parsetree'].split("[[Catégorie:")[i].split("]]")[0].split("|*")[0]
        indiv_cat.append(cat)
        if cat not in all_cat:
            all_cat.append(cat)
    wiki_n["categories"] = indiv_cat
    print(indiv_cat)
    wiki_n['wikibase'] = page['wikibase']
    print(page['wikibase'])
    try:
        graph.push(wiki_n)
    except:
        print("cannot push node :", wiki_n)
    print("**************************************************************************")

len(all_keys)
len(all_cat)

with open(wikipath + "all_infobox_keys.txt" , "w", encoding='utf-8') as file:
    file.write(str(all_keys))
with open(wikipath + "all_categories.txt" , "w", encoding='utf-8') as file:
    file.write(str(all_cat))

# %% import wikipedia Summary:

results = graph.nodes.match("Wikipedia")
Esempio n. 19
0
class NetworkGraph:
    def __init__(self):
        path = os.path.realpath(ROOT_DIR + '/neo4j_creds.json')
        with open(path) as f:
            data = json.load(f)
        username = data['username']
        password = data['password']
        self.graph = Graph(host="localhost",
                           username=username,
                           password=password)

    def add_node_by_name(self,
                         name,
                         age=None,
                         gender=None,
                         node_type="PERSON"):
        if name == 'USER':
            node_type = 'user'

        node = Node(node_type, name=name, age=age, gender=gender)
        self.graph.create(node)

        return node

    def get_node_by_name(self, name):
        matcher = NodeMatcher(self.graph)
        node = matcher.match(name=name).first()

        return node

    def add_relationship(self, node1, node2, rel_type='KNOWS'):
        first_node = self.get_node_by_name(node1)
        second_node = self.get_node_by_name(node2)

        if not first_node:
            first_node = self.add_node_by_name(node1)
        if not second_node:
            second_node = self.add_node_by_name(node2)

        self.graph.create(Relationship(first_node, rel_type, second_node))

    def add_rel_tuple(self, ent1, ent2):
        """
        Pushes a new central user 'Me' to the graph
        Gets a username, creats an Me object and pushes it to the graph
        :param username: string username
        :return: me object (see ogm pkg)
        """
        # define nodes
        node1 = Misc()
        node1.name = ent1

        node2 = Misc()
        node2.name = ent2

        # add relationship to nodes
        node1.related_ent.add(node2)
        node2.related_ent.add(node1)

        # save to neo4j
        self.graph.create(node1)
        self.graph.create(node2)

    def search_node_by_name(self, node_name):
        # replace white spaces
        _node_name = node_name.replace(" ", "")

        query = 'MATCH (n) WHERE n.name={node_name} RETURN n;'
        result = self.graph.run(
            query,
            node_name=_node_name,
        ).data()

        if result:
            node = result[0]['n.name']
        else:
            node = None

        return node

    def add_me_w_firstname(self, username, age="", gender=""):
        """
        Pushes a new central user 'Me' to the graph
        Gets a username, creats an Me object and pushes it to the graph
        :param username: string username
        :return: me object (see ogm pkg)
        """
        # OGM
        me = Me()
        me.firstname = username.title()
        me.lastname = ""
        me.age = age
        me.gender = gender

        self.graph.push(me)
        return me

    def add_me_w_lastname(self, username, age="", gender=""):
        """
        Pushes a new central user 'Me' to the graph
        Gets a username, creats an Me object and pushes it to the graph
        :param username: string username
        :return: me object (see ogm pkg)
        """
        # OGM
        me = Me()
        me.firstname = ""
        me.lastname = username.title()
        me.age = age
        me.gender = gender

        self.graph.push(me)
        return me

    def get_me_by_firstname(self, me_name):
        """
        return me object by firstname
        :param me_name: string with firstname of me
        :return: me object
        """
        result = self.graph.run('MATCH (n:Me) WHERE n.firstname="' +
                                me_name.title() +
                                '" RETURN n.firstname').data()

        me = Me()
        if result:
            me.firstname = result[0]['n.firstname']
            return me
        else:
            return None

    def get_me_by_lastname(self, me_name):
        """
        return me object by firstname
        :param me_name: string with firstname of me
        :return: me object
        """
        result = self.graph.run('MATCH (n:Me) WHERE n.lastname="' +
                                me_name.title() +
                                '" RETURN n.lastname').data()

        me = Me()
        if result:
            me.firstname = result[0]['n.lastname']
            return me
        else:
            return None

    def add_contact(self, me_name, contactname, relationship):
        """
        adds a new contact to the central user i.e. 'Me' in graph
        :param me: name of the centraluser object
        :param contact: string will be converted to contact object
        :param relationship: string will be converted to object property
        :return:
        """
        # select central user 'Me'
        me = self.get_me_by_firstname(me_name)

        contact = Contact()
        contact.firstname = contactname

        relationship = relationships[relationship]

        if relationship == 'freund':
            me.friend.add(contact)
            contact.friend.add(me)
        elif relationship == 'bruder':
            me.brother.add(contact)
            contact.brother.add(me)
        elif relationship == 'schwester':
            me.sister.add(contact)
            contact.sister.add(me)
        elif relationship == 'mutter':
            me.mother.add(contact)

        elif relationship == 'vater':
            me.father.add(contact)
        elif relationship == 'sohn':
            me.son.add(contact)
        elif relationship == 'tocher':
            me.daughter.add(contact)
            #TODO other relationships

        self.graph.push(me)

    def search_relationship_by_contactname(self, me_name, contact_name):
        mename = me_name.replace(" ", "")
        contactname = contact_name.replace(" ", "")

        query = 'MATCH (n:Me)-[r]->(c:Contact) WHERE n.firstname={me_name} AND c.firstname={contactname} RETURN type(r);'
        result = self.graph.run(query, me_name=mename,
                                contactname=contactname).data()
        if result:
            relationship = result[0]['type(r)']
        else:
            relationship = None

        return relationship

    def search_contactname_by_relationship(self, me_name, relationship):
        relationship = relationships[relationship]
        if relationship:
            result = self.graph.run('MATCH (u:Me)-[:' + relationship +
                                    ']->(c:Contact) RETURN c.firstname;',
                                    rel=relationship).data()
        else:
            return None

        if result:
            contactname = result[0]['c.firstname']
        else:
            contactname = None

        return contactname
Esempio n. 20
0
class Neo4jOperate(object):
    def __init__(self, host, user, password):
        self.graph = Graph(host, auth=(user, password))

        self.relationship_matcher = RelationshipMatcher(self.graph)

    def create_one_relationship(self,
                                start_node={},
                                target_node={},
                                relationship=None,
                                **prop):
        """
        创建两个节点间关系: 若已存在
        :param start_node: Node or  dict ==> {"label": "Teacher", "search":{"id": 123}}
        :param target_node: Node or dict ==> {"label": "Patent", "search":{"ipc_code": "CN102931914A"}}
        :param relationship: str 关系名
        :param prop: 关系属性
        :return: True or False
        """
        s_node = self.search_node(start_node)
        t_node = self.search_node(target_node)
        if s_node is None or t_node is None:
            return False

        relation = self.search_relationship(s_node, t_node, relationship)
        if relation:
            return self.update_one_relationship(s_node, t_node, relationship,
                                                **prop)
        return self.create_relation(start_node=s_node,
                                    relationship=relationship,
                                    target_node=t_node,
                                    **prop)

    def update_one_relationship(self,
                                start_node={},
                                target_node={},
                                relation=None,
                                cover=True,
                                **prop):
        """
        修改两个节点间关系
        :param start_node: Node or  dict ==> {"label": "Teacher", "search":{"id": 123}}
        :param target_node: Node or dict ==> {"label": "Patent", "search":{"ipc_code": "CN102931914A"}}
        :param relation: 关系名 or Relationship
        :param cover: 新传入的属性值是否覆盖原值, 默认True
        :param prop: 关系属性
        :return: True or False
        """
        if not isinstance(relation, Relationship):
            relation = self.search_relationship(start_node, target_node,
                                                relation)

        if relation is None:
            return False

        for key, value in dict(prop).items():
            if key not in relation or cover is True:
                relation[key] = value
            else:
                relation[key] += value
        try:
            self.graph.push(relation)
            return True
        except Exception as e:
            print(e)
            return False

    def delete_one_relationship(self,
                                start_node={},
                                target_node={},
                                relationship=None,
                                properety=None):
        """
        删除一条节点间关系, 返回关系上的所有属性
        :param start_node: dict ==> {"label": "Teacher", "search":{"id": 123}}
        :param target_node: dict ==> {"label": "Patent", "search":{"ipc_code": "CN102931914A"}}
        :param relationship: 关系名
        :param properety: set 属性集合,若不为空,返回被删除关系的对应属性值: {"weight", "paper",...}
        :return: 关系属性:{} or False
        """
        relation = self.search_relationship(start_node, target_node,
                                            relationship)
        if relation is None:
            return True

        try:
            # 关系存在 ==> 删除关系,返回关系属性
            if properety is not None and len(properety) > 0:
                back = {}
                for key in set(properety):
                    if key in relation:
                        back[key] = relation[key]
                return back
            self.graph.separate(relation)
            return True
        except Exception as e:
            return False

    def migrate_relationship(self,
                             source_node={},
                             self_node={},
                             target_node={},
                             r_type=None,
                             **property):
        """
        断开某节点与当前节点的关系,并将该关系转移到另一节点中, 保存关系上的所有属性
        A-B ==> B-C
        :param source_node: Node or  dict ==> {"label": "Teacher", "search":{"id": 123}}
        :param self_node: Node or  dict
        :param target_node: Node or  dict
        :param r_type: 关系名
        :param property: set, 需要转移的属性集合
        :return: True or False
        """
        relation = self.delete_one_relationship(source_node, self_node, r_type,
                                                **property)
        return self.create_one_relationship(target_node, self_node, **relation)

    def search_relationship(self,
                            start_node={},
                            target_node={},
                            relationship=None):
        """
        查找节点间关系
        :param start_node: dict ==> {"label": "Teacher", "search":{"id": 123}}
        :param target_node:  dict ==> {"label": "Patent", "search":{"ipc_code": "CN102931914A"}}
        :param relationship: 关系名
        :return: Relationship or None
        """
        # 查找结点
        start_node = self.search_node(start_node)
        if start_node is None:
            return None
        target_node = self.search_node(target_node)
        if target_node is None:
            return None

        return self.relationship_matcher.match(nodes=[start_node, target_node],
                                               r_type=relationship).first()

    def search_node(self, search_dict):
        """
        根据给定参数字典返回节点
        :param search_dict: Node or dict ==> {"label": "Teacher", "search":{"id": 123}}
        :return: Node or None
        """
        if type(search_dict) is Node:
            return search_dict

        elif type(search_dict) is dict:
            if "label" in search_dict and "search" in search_dict:
                node_search = self.graph.nodes.match(
                    search_dict["label"]).where(
                        **search_dict["search"]).first()
                return node_search
            else:
                return None

    def update_node(self, search_dict, **prop):
        """
        根据给定参数字典更新节点属性
        :param search_dict: Node or dict ==> {"label": "Teacher", "search":{"id": 123}}
        :param prop: 节点属性
        :return: True or False
        """
        node = self.get_node(search_dict=search_dict,
                             create_if_not_exist=False)
        if node is None:
            return False
        for key, value in prop.items():
            node[key] = value
        try:
            self.graph.push(node)
            return True
        except Exception as e:
            return False

    def get_node(self, search_dict, create_if_not_exist=True, **prop):
        """
        从数据库中搜索节点,若数据库中不存在,创建节点再返回
        :param search_dict: Node or dict ==> {"label": "Teacher", "search":{"id": 123}}
        :param create_if_not_exist: boolean型, 若查询不到节点则 创建, 默认 True
        :param prop: 节点属性
        :return: Node or None ==> 表示创建节点失败
        """
        node = self.search_node(search_dict)
        if node is None and create_if_not_exist:
            return self.create_node(search_dict=search_dict, **prop)
        # else:
        #     # print("节点已存在")
        #     Logger.info("node exist")
        return node

    def create_node(self, search_dict, **prop):
        node = Node(search_dict["label"], **prop)
        try:
            self.graph.create(node)
            return self.search_node(search_dict)
        except Exception as e:
            # print("创建节点失败,%s" % e)
            return None

    def create_relation(self, start_node, relationship, target_node, **prop):
        relation = Relationship(start_node, relationship, target_node)
        for key, value in dict(prop).items():
            relation[key] = value

        try:
            self.graph.create(relation)
            return True
        except Exception as e:
            # print("创建关系失败:%s" % e)
            return False
        return False

    def run(self, cql):
        return self.graph.run(cql).data()

    def run_without_data(self, cql):
        return self.graph.run(cql)
Esempio n. 21
0
node_2_call_node_1 = Relationship(test_node_2, 'CALL', test_node_1)
node_2_call_node_1['count'] = 2
test_graph.create(node_1_call_node_2)
test_graph.create(node_2_call_node_1)
# 如以上代码,分别建立了test_node_1指向test_node_2和test_node_2指向test_node_1两条关系,关系的类型为"CALL",两条关系都有属性count,且值为1。

# 在这里有必要提一下,如果建立关系的时候,起始节点或者结束节点不存在,则在建立关系的同时建立这个节点。

# 四、节点/关系的属性赋值以及属性值的更新

# 节点和关系的属性初始赋值在前面节点和关系的建立的时候已经有了相应的代码,在这里主要讲述一下怎么更新一个节点/关系的属性值。

# 我们以关系建立里的 node_1_call_node_2 为例,让它的count加1,再更新到图数据库里面。

node_1_call_node_2['count'] += 1
test_graph.push(node_1_call_node_2)
# 更新属性值就使用push函数来进行更新即可。

# 五、通过属性值来查找节点和关系(find,find_one)

# 通过find和find_one函数,可以根据类型和属性、属性值来查找节点和关系。

find_code_1 = test_graph.find_one(
		label="Person",
		property_key="name",
		property_value="test_node_1"
)
print(find_code_1['name'])
# find和find_one的区别在于:
# find_one的返回结果是一个具体的节点/关系,可以直接查看它的属性和值。如果没有这个节点/关系,返回None。
# find查找的结果是一个游标,可以通过循环取到所找到的所有节点/关系。
Esempio n. 22
0
                if to_node:
                    from_node.related_exam.add(to_node)
            elif rel_type == "related_drug":
                to_node = get_node_by_id("drug", to_id)
                if to_node:
                    from_node.related_drug.add(to_node)
            elif rel_type == "related_operation":
                to_node = get_node_by_id("operation", to_id)
                if to_node:
                    from_node.related_operation.add(to_node)

        elif from_type == "exam":
            from_node = get_node_by_id("exam", from_id)
            if rel_type == "related_bodypart":
                to_node = get_node_by_id("bodypart", to_id)
                if to_node:
                    from_node.related_bodypart.add(to_node)

        elif from_type == "operation":
            from_node = get_node_by_id("operation", from_id)
            if rel_type == "related_depart":
                to_node = get_node_by_id("depart", to_id)
                if to_node:
                    from_node.related_depart.add(to_node)
            elif rel_type == "related_bodypart":
                to_node = get_node_by_id("bodypart", to_id)
                if to_node:
                    from_node.related_bodypart.add(to_node)

        graph.push(from_node)
Esempio n. 23
0
class Graph(object):
    def __init__(self, neo4j_uri):
        self.graph = NeoGraph(neo4j_uri)
        self.nodes = Nodes(self.graph)
        self.links = Links(self.graph)

    def create_user(self, args):
        node = self.nodes.find("User", args["username"])
        if not node:
            passhash = Authenticate.hashgen(args["username"], args["password"])
            properties = dict(
                node_id=args["username"],
                name=args["name"],
                city=args["city"],
                passhash=passhash
            )
            node = Node("User", **properties)
            self.graph.create(node)
            return node, True
        return node, False

    def user_rank(self, args, node_type):
        # success = False
        # errors = []

        user = self.nodes.find("User", args["user_id"])
        if not user:
            return False, "invalid user_id"

        node = self.nodes.find(node_type, args["node_id"])
        if not node:
            return False, "invalid node_id"

        link = self.links.find(user, node, "RANKS")
        if link and ("issue_id" not in args or
                     link.properties["issue_id"] == args["issue_id"]):
            link.properties["rank"] = args["rank"]
            link.push()
        else:
            properties = {"rank": args["rank"]}
            if "issue_id" in args:
                properties["issue_id"] = args["issue_id"]
            self.graph.create(Relationship(user, "RANKS", node, **properties))

        return True, ""

    def user_map(self, args, src_node, dst_node):
        # TODO refactor this into smaller units

        # success = False
        errors = []

        # retrieve nodes and existing links
        user = self.nodes.find("User", args["user_id"])
        if not user:
            errors.append("invalid user_id")
        src = self.nodes.find(src_node, args["src_id"])
        if not src:
            errors.append("invalid src_id")
        dst = self.nodes.find(dst_node, args["dst_id"])
        if not dst:
            errors.append("invalid dst_id")
        src_link = self.links.find(user, src, "RANKS")
        if not src_link:
            errors.append("user has not ranked src_node")
        dst_link = self.links.find(user, dst, "RANKS")
        if not dst_link:
            errors.append("user has not ranked dst_node")
        if errors:
            return False, ", ".join(errors)

        src_rank = src_link.properties["rank"]
        dst_rank = dst_link.properties["rank"]

        # fetch map node or create if it doesn't exist
        map_id = "{0}-{1}".format(args["src_id"], args["dst_id"])
        map_node = self.nodes.find("Map", map_id)
        if not map_node:
            properties = dict(node_id=map_id)
            map_node = Node("Map", **properties)
            self.graph.create(map_node)
            self.graph.create(Relationship(src, "MAPS", map_node, **{}))
            self.graph.create(Relationship(map_node, "MAPS", dst, **{}))

        user_map_link = self.links.find(user, map_node, "MAPS")
        if user_map_link:
            # link already exists, update strength
            user_map_link.properties["strength"] = args["strength"]
            user_map_link.properties["src_rank"] = src_rank
            user_map_link.properties["dst_rank"] = dst_rank
            self.graph.push()
        else:
            # create new link from user to map node
            properties = dict(
                strength=args["strength"],
                src_rank=src_rank,
                dst_rank=dst_rank
            )
            self.graph.create(Relationship(user, "MAPS", map_node, **properties))

        return True, ""

    def get_summary(self, issue_id, node_type):
        issue = self.nodes.find("Issue", issue_id)
        if not issue:
            return False, "issue <{0}> does not exist".format(issue_id), []

        # TODO only grab nodes that are connected to issue node
        cypher = self.graph.cypher
        query = """
            MATCH (u:User)-[r:RANKS]-(v:`{0}`)
            RETURN
                r.rank AS rank,
                v.node_id AS node_id,
                count(u.node_id) AS count
            ORDER BY
                node_id, rank
        """.format(node_type)
        results = cypher.execute(query)
        nodes = {}
        invalid = []
        for row in results:
            if row.node_id not in nodes:
                nodes[row.node_id] = [0, 0, 0, 0, 0]
            if row.rank in range(-2, 3):
                nodes[row.node_id][row.rank + 2] = row.count
            else:
                invalid.append(row.rank)
        return True, nodes, invalid
# encoding=utf8

#用于统一命令参数格式(hadoop)

from py2neo import Graph, Node, Relationship

graph = Graph("http://localhost:7474", auth=("neo4j", "12345"))

com_node = graph.nodes.match("命令", platform='hadoop')

for ele in com_node:
    parameter = ele['parameter']
    if parameter:
        parameter2 = str(parameter).replace('| ', '||| ')
        print(parameter2)
        ele.update({'parameter': parameter2})
        graph.push(ele)
Esempio n. 25
0
class Command(BaseCommand):
    help = 'port group data from sql server to neo4j.'

    def __init__(self, *args, **kwargs):
        super(Command, self).__init__(*args, **kwargs)

        self._sql_server_conn = pymssql.connect(server='SX-DEV')

        self._init_graph()

    def handle(self, *args, **options):
        self._start_import()

    def _init_graph(self):
        self._graph = Graph(host=settings.NEO4J['HOST'],
                            http_port=settings.NEO4J['PORT'],
                            user=settings.NEO4J['USER'],
                            password=settings.NEO4J['PWD'])

    def _start_import(self):
        self.stdout.write('Start to migrate data from sql server to neo4j')

        group_db, person_db = self._get_databases()

        # create all the group nodes
        for db in group_db:
            for table in self._get_db_tables(db):
                self._create_group(db, table)

        # create all group users nodes and build relations
        for db in person_db:
            for table in self._get_db_tables(db):
                self._create_person(db, table)

        self._close_mssql_conn()

        self.stdout.write(
            self.style.SUCCESS(
                'Successfully imported all data to neo4j server'))

    def _close_mssql_conn(self):
        self._sql_server_conn.close()

    def _get_databases(self):
        cursor = self._sql_server_conn.cursor()
        cursor.execute('SELECT name FROM sys.databases;')
        dbs = cursor.fetchall()

        group_db = []
        person_db = []
        for db in dbs:
            db_name = db[0]
            if 'GroupData' in db_name:
                person_db.append(db_name)
            elif 'QunInfo' in db_name:
                group_db.append(db_name)

        return group_db, person_db

    def _get_db_tables(self, db_name):
        cursor = self._sql_server_conn.cursor()
        cursor.execute(
            "SELECT TABLE_NAME FROM %s.INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE';"
            % db_name)
        return [
            tb[0] for tb in cursor.fetchall()
            if 'QunList' in tb[0] or 'Group' in tb[0]
        ]

    def _create_group(self, db_name, table_name, start_id=0):
        curr_id = start_id
        cursor = self._sql_server_conn.cursor()
        cursor.execute('SELECT count(*) FROM %s.dbo.%s where id > %d' %
                       (db_name, table_name, start_id))

        total = cursor.fetchall()[0][0]

        cursor = self._sql_server_conn.cursor()
        cursor.execute('SELECT * FROM %s.dbo.%s where id > %d ORDER BY id' %
                       (db_name, table_name, start_id))

        pbar = tqdm(desc='Creating Group Nodes from [%s.%s]' %
                    (db_name, table_name),
                    total=total)
        try:
            g = cursor.fetchone()
            while g:
                curr_id = g[0]
                group = Group()
                group.number = g[1]
                group.mastqq = g[2]
                group.date = g[3]
                group.title = g[4]
                group.groupclass = g[5]
                group.intro = g[6]
                self._graph.merge(group)
                pbar.update(1)
                g = cursor.fetchone()
        except:
            print('Catch an Exception, resume group creating from id: %d' %
                  (curr_id - 1))
            pbar.close()
            self._init_graph()
            self._create_group(db_name, table_name, curr_id - 1)
        pbar.close()

    def _create_person(self, db_name, table_name, start_id=0):
        curr_id = start_id
        cursor = self._sql_server_conn.cursor()
        cursor.execute('SELECT count(*) FROM %s.dbo.%s where id > %d' %
                       (db_name, table_name, start_id))

        total = cursor.fetchall()[0][0]

        cursor = self._sql_server_conn.cursor()
        cursor.execute('SELECT * FROM %s.dbo.%s where id > %d ORDER BY id' %
                       (db_name, table_name, start_id))

        pbar = tqdm(desc='Creating Person Nodes and Relations from [%s.%s]' %
                    (db_name, table_name),
                    total=total)
        try:
            p = cursor.fetchone()
            while p:
                curr_id = p[0]
                person = Person()
                person.qq = p[1]
                person.nick = p[2]
                person.age = p[3]
                person.gender = p[4]
                person.auth = p[5]
                group_number = p[6]
                # get group node
                group = Group.select(self._graph, group_number).first()
                if group:
                    # build relations
                    person.groups.add(group)
                    group.members.add(person)
                    # update group node
                    self._graph.push(group)
                self._graph.merge(person)
                pbar.update(1)
                p = cursor.fetchone()
        except:
            print('Catch an Exception, resume person creating from id: %d' %
                  (curr_id - 1))
            pbar.close()
            self._init_graph()
            self._create_person(db_name, table_name, curr_id - 1)
        pbar.close()
Esempio n. 26
0
entity_out.comment = e_out.comment

a = pyproms.ProvActivity('NEXIS processing',
                         startedAtTime=startedAtTime,
                         endedAtTime=endedAtTime,
                         wasAssociatedWith=rs,
                         used_entities=[e1, e2],
                         generated_entities=[e_out])
actity_a = Activity()
actity_a.uri = a.uri
actity_a.startedAtTime = a.startedAtTime
actity_a.endedAtTime = a.endedAtTime
actity_a.wasAssociatedWith = a.wasAssociatedWith
actity_a.wasInformedBy = a.wasInformedBy

gh.push(actity_a)
gh.push(entity_out)
gh.push(entity_2)
gh.push(entity_)
gh.push(agent_)
# Report generation
r = pyproms.PromsExternalReport(
    'NEXIS Report',
    wasReportedBy=pyproms.PromsReportingSystem('Fake RS'),
    nativeId='NEXIS run #34',
    reportActivity=a,
    generatedAtTime=endedAtTime)  #time.ctime())

with open('report.1908.ttl', 'wb') as f:
    f.write(r.get_graph().serialize(format='turtle'))
Esempio n. 27
0
graph.schema.create_uniqueness_constraint("Album", "slug")
# graph.schema.create_uniqueness_constraint("Word", "slug")
# graph.schema.create_uniqueness_constraint("Genre", "slug")

for i,r in df.iterrows():

    s_id = r['spotify_id']
    cover = r['albumcover']
    artist = r['artist_column']
    album = r['album_column']
    slug = r['slug_column']
    print [album]
    album = Node('Album', name = album, artist = artist, albumcover = cover, s_id = s_id, slug = slugify(slug))
    graph.create(album)
    graph.push()

    if pd.Series(r['genre_column']).any():
        for each in r['genre_column']:
            genre = graph.find_one('Genre', 'slug', slugify(each[0]))
            if not genre:
                genre = Node('Genre', name=each[0], slug=slugify(each[0]))
            rel = Relationship(album, 'Performs', genre, dist=each[1])
            graph.create(rel)
    for value in zip(r.index[5:],r[5:]):
        if value[1] != 1:
            word = graph.find_one('Word', 'slug', slugify(value[0]))
            if not word:
                word = Node('Word', name=value[0], slug=slugify(value[0]))
            rel = Relationship(word, 'Describes', album, dist=value[1])
            graph.create(rel)
Esempio n. 28
0
# 创建节点(均可直接赋值变量)
a = Node('User', name='yaim')  #第1个参数为节点类型,第2个参数为节点属性和值
b = Node('User', name='fyl')
# 创建关系
r = Relationship(a, 'SAME', b)
# 将节点和关系加入到数据库
s = a | b | r
graph.create(s)

#查询节点
print(graph.nodes[0])  #根据节点id,返回节点
print(graph.nodes.get(0))  #同上
print(list(graph.nodes.match('User')))  #根据条件,返回节点列表
print(graph.nodes.match('User', name='yaim').first())  #根据条件,返回第1个节点
# 查询关系
rel_matcher = RelationshipMatcher(graph)
rel_all = list(rel_matcher.match())  #获取所有关系,返回列表
rel_this = list(rel_matcher.match(r_type='SAME'))  #获取某一关系,返回列表

# 删除节点
node_del = graph.nodes.match('User', name='temp').first()  #先查询到某一个节点
graph.delete(node_del)  #再删除
# ***删除关系
rel_del = rel_matcher.match(r_type='SAME').first()  #先查询到某一个关系
graph.delete(rel_del)  #再删除,*连带删除节点?*

# 更新节点
node_update = graph.nodes.match('User', name='yaim').first()  #先查询到某一个节点
node_update['name'] = 'yaim_new'  #更新该节点某一属性值
graph.push(node_update)  #提交更新
# ***更新关系
Esempio n. 29
0
__author__ = "Andrei"

from py2neo import Graph
from py2neo import Node, Relationship


graph = Graph("http://*****:*****@localhost:7474/db/data")
alice = Node("Person", name="Alice")
bob = Node("Person", name="Bob")
alice_knows_bob = Relationship(alice, "KNOWS", "bob")
graph.create(alice_knows_bob)

alice.properties["age"] = 33
bob.properties["age"] = 44

graph.push(alice, bob)
Esempio n. 30
0
class Graph(object):

    def __init__(self, neo4j_uri):
        self.graph = NeoGraph(neo4j_uri)
        self.nodes = Nodes(self.graph)
        self.links = Links(self.graph)

    def execute_raw(self, cqlfile):
        cypher = self.graph.cypher
        with open(cqlfile, 'r') as query:
            return cypher.execute(query.read())
        return []

    def create_user(self, args):
        node = self.nodes.find("User", args["username"])
        if not node:
            passhash = Authenticate.hashgen(args["username"], args["password"])
            properties = dict(
                node_id=args["username"],
                name=args["name"],
                city=args["city"],
                passhash=passhash
            )
            node = Node("User", **properties)
            self.graph.create(node)
            return node, True
        return node, False

    def create_issue_nodes(
            self, parent, names, node_type, link_type="HAS", link_prop={}):
        # support function for create_issue 
	# create nodes of 1 type (value/objective/policy)
	# and link those to the sourceNode, with specified linkType and properties
        nodes = []
        for name in names:
            properties = dict(
                node_id=str(uuid.uuid4()),
                name=name
            )
            node = Node(node_type, **properties)
            self.graph.create(node)
            self.graph.create(Relationship(parent, link_type, node, **link_prop))
            nodes.append(node)
        return nodes

    def create_issue(self, args):
        # create a new issue Node
        # assign a random node_id using python uuid module
	# below try uuid4, uuid1 works as well 
        issue_properties = dict(
                node_id=str(uuid.uuid4()),
                name=args["issue_name"],
                desc=args["desc"]
            )
        issue_node = Node("Issue", **issue_properties)
        self.graph.create(issue_node)
 
        # create new nodes and links for values/objectives/policies
        # associated with the new issue
        self.create_issue_nodes(issue_node, args["values"], "Value")
        self.create_issue_nodes(issue_node, args["objectives"], "Objective")
        self.create_issue_nodes(issue_node, args["policies"], "Policy")
        return issue_properties["node_id"]

    def user_rank(self, args, node_type):
        # success = False
        # errors = []

        user = self.nodes.find("User", args["user_id"])
        if not user:
            return False, "invalid user_id"

        node = self.nodes.find(node_type, args["node_id"])
        if not node:
            return False, "invalid node_id"

        link = self.links.find(user, node, "RANKS")
        if link:
            link.properties["rank"] = args["rank"]
            link.push()
        else:
            properties = {"rank": args["rank"]}
            if "issue_id" in args:
                properties["issue_id"] = args["issue_id"]
            self.graph.create(Relationship(user, "RANKS", node, **properties))
        return True, ""

    def user_map(self, args, src_node, dst_node):
        # TODO refactor this into smaller units

        # success = False
        errors = []

        # retrieve nodes and existing links
        user = self.nodes.find("User", args["user_id"])
        if not user:
            errors.append("invalid user_id")
        src = self.nodes.find(src_node, args["src_id"])
        if not src:
            errors.append("invalid src_id")
        dst = self.nodes.find(dst_node, args["dst_id"])
        if not dst:
            errors.append("invalid dst_id")
        src_link = self.links.find(user, src, "RANKS")
        if not src_link:
            errors.append("user has not ranked src_node")
        dst_link = self.links.find(user, dst, "RANKS")
        if not dst_link:
            errors.append("user has not ranked dst_node")
        if errors:
            return False, ", ".join(errors)

        src_rank = src_link.properties["rank"]
        dst_rank = dst_link.properties["rank"]

        # fetch map node or create if it doesn't exist
        map_id = "{0}-{1}".format(args["src_id"], args["dst_id"])
        map_node = self.nodes.find("Map", map_id)
        if not map_node:
            properties = dict(node_id=map_id)
            map_node = Node("Map", **properties)
            self.graph.create(map_node)
            self.graph.create(Relationship(src, "MAPS", map_node, **{}))
            self.graph.create(Relationship(map_node, "MAPS", dst, **{}))

        user_map_link = self.links.find(user, map_node, "MAPS")
        if user_map_link:
            # link already exists, update strength
            user_map_link.properties["strength"] = args["strength"]
            user_map_link.properties["src_rank"] = src_rank
            user_map_link.properties["dst_rank"] = dst_rank
            self.graph.push()
        else:
            # create new link from user to map node
            properties = dict(
                strength=args["strength"],
                src_rank=src_rank,
                dst_rank=dst_rank
            )
            self.graph.create(Relationship(user, "MAPS", map_node, **properties))
        return True, ""

    def get_summary(self, issue_id, node_type):
        issue = self.nodes.find("Issue", issue_id)
        if not issue:
            return False, "issue <{0}> does not exist".format(issue_id), []

        # TODO only grab nodes that are connected to issue node
        cypher = self.graph.cypher
        query = """
            MATCH (u:User)-[r:RANKS]-(v:`{0}`)
            RETURN
                r.rank AS rank,
                v.node_id AS node_id,
                v.name AS name,
                count(u.node_id) AS count
            ORDER BY
                node_id, rank
        """.format(node_type)
        results = cypher.execute(query)
        nodes = {}
        invalid = []
        for row in results:
            if row.node_id not in nodes:
                nodes[row.node_id] = dict(name=row.name, data=[0, 0, 0, 0, 0])
            if row.rank in range(-2, 3):
                nodes[row.node_id]["data"][row.rank + 2] = row.count
            else:
                invalid.append(row.rank)
        return True, nodes, invalid
                        n['p'][
                            'int_ratio2'] = int_ratio2  # ratio of numeric matches in attribute 2
                        n['p'][
                            'str_ratio2'] = str_ratio2  # ratio of string matches in attribute 2
                        n['p'][
                            'date_ratio2'] = date_ratio2  # ratio of date matches in attribute 2
                        n['p'][
                            'no_unique_values1'] = no_unique_values1  # number of unique values in attribute 1
                        n['p'][
                            'no_unique_values2'] = no_unique_values2  # number of unique values in attribute 2
                        n['p'][
                            'top_value1'] = top_value1  # most frequently occuring value in attribute 1
                        n['p'][
                            'top_value2'] = top_value2  # most frequently occuring value in attribute 2
                        n['p']['values_update_timestamp'] = get_timestamp()
                        graph.push(n['p'])
                        newly_computed_count += 1

                        print()
                        print()
                        print('NEWLY CALCULATED')
                        print('--------------------------------------------')
                        print('Attribute 1: ' + facet1)
                        print('Attribute 2:' + facet2)
                        print('--------------------------------------------')
                        print('Exact Score:', exact_score)
                        print('Type Match:', type_match)
                        print('Magnitude Difference:', magnitude_difference)
                        print('Jaro Score:', jaro_score)
                        print()
                        print('No. of missing pairs so far: ', missing_count)
len(graph.nodes.match("Website"))
len(graph.relationships.match())


********************************************************************************

# This imports in db data from 202005Websites01_D0.csv
WebD0_df=pd.read_csv("C:\\Users\\Jo\\Documents\\Tech\\Atom_prj\\MyMedia-FillDB\\data\\HypheExport20200520\\202005Websites01_D0.csv")

for index, row in WebD0_df.iterrows():
    node = graph.nodes.match(D0_id = str(row['ID'])).first()
    try:
        node['D0_home_page'] = row['HOME PAGE']
        node['D0_start_pages'] = row['START PAGES']
        graph.push(node)
    except:
        print(row['ID'])

# This imports crawl data :
Crawl_D0_df = pd.read_csv("C:\\Users\\Jo\\Documents\\Tech\\Atom_prj\\MyMedia-FillDB\\data\\HypheExport20200520\\202005Websites01_D0_crawls.csv")

for index, row in Crawl_D0_df.iterrows():
    node = graph.nodes.match(D0_id = str(row['webentity_id'])).first()
    try:
        node['D0_max_depth']=row['max_depth']
        node['D0_nb_pages']=row['nb_pages']
        node['D0_nb_crawled_pages']=row['nb_crawled_pages']
        node['D0_nb_pages_indexed']=row['nb_pages_indexed']
        node['D0_nb_unindexed_pages']=row['nb_unindexed_pages']
        node['D0_nb_links']=row['nb_links']
Esempio n. 33
0
    customers = rds.smembers(key)
    c_s_relatiions[key.split(':')[2]] = customers
'''
sid is the key of c_s_relatiions, for example S0000003
'''
selector = NodeSelector(graph)
for sid in c_s_relatiions:
    c_set = c_s_relatiions[sid]
    #    if sid != 'S0000003':
    #        continue
    _snode = selector.select('Servicer', servicer_id=sid).first()
    _node = Node('Servicer', servicer_id=sid)
    _servicer = rds.hgetall('qxy:servicer:' + sid)
    if _snode:
        _snode.update(_servicer)
        graph.push(_snode)
    else:
        _node.update(_servicer)
        graph.create(_node)
        _snode = _node

    for c in c_set:
        #        if c != '13551472168':
        #            continue
        _cnode = selector.select('Customer', pid=c).first()
        if not _cnode:
            _node = Node('Customer', pid=c)
            graph.create(_node)
            _cnode = _node

        _msg_keys = []  # msg:sid:pid:*
Esempio n. 34
0
class Database():
    """Manage Database.
    管理数据库。

    It support python command line parameter processing of relational database
    and graph database.
    You can view all the features by 'python xxx.py -h'.
    支持关系数据库和图形数据库的python命令行参数处理。
    可以通过'python xxx.py -h'查看所有功能。

    Public attributes:
    - rdb: Relational database. 关系数据库。
    - graph: Graph database. 图数据库。
    """
    def __init__(self, password="******", userid="userid", is_admin=True):
        self.is_admin = is_admin
        self.rdb = None
        self.graph = Graph("http://localhost:7474/db/data", password=password)
        self.selector = NodeSelector(self.graph)
        # DeprecationWarning: Graph.find_one is deprecated, use NodeSelector instead. 2017-5-18
        # self.gconfig = self.graph.find_one("User", "userid", userid)
        # 用法1:subgraph = selector.select("Label", property=value)
        # 用法2:subgraph = selector.select("Person").where("_.name =~ 'J.*'", "1960 <= _.born < 1970")
        self.gconfig = self.selector.select("User", userid=userid).first()
        self.usage = "usage: python %prog [options] arg"
        self.version = "%prog 1.0"
        self.parser = OptionParser(usage=self.usage, version=self.version)
        self.parser.add_option("-v", "--verbose", action="store_true", dest="verbose")
        self.parser.add_option("-q", "--quiet", action="store_false", dest="verbose")
        self.parser.add_option("-b", "--batch", dest="batch", action="store_true", \
        help="batch processing of graph database")
        self.parser.add_option("-f", "--file", dest="filename", \
        help="read data from filename")
        self.parser.add_option("-p", "--path", dest="filepath", \
        help="read data from filepath")
        self.parser.add_option("-a", "--add", dest="add", \
        help="add subgraph to graph database")
        self.parser.add_option("-d", "--delete", dest="delete", \
        help="delete subgraph of graph database")
        self.parser.add_option("-e", "--edit", dest="edit", \
        help="edit subgraph of graph database")
        self.parser.add_option("-s", "--search", dest="search", \
        help="search subgraph of graph database")
        (self.options, self.args) = self.parser.parse_args()
        # if len(self.args) == 0:
            # self.parser.error("incorrect number of arguments")
        if self.options.verbose:
            print("reading %s..." % self.options.filename)
        if self.options.delete:
            for label in self.args:
                self.delete(pattern=self.options.delete, label=label)

    def delete(self, pattern="n", label=None):
        """Batch delete data or subgraph in database.
        在数据库中批量删除数据或者子图。

        Args:
            pattern: Type of subgraph. 子图类型。
            label: Label of subgraph. 子图标签。
        """
        if pattern == "all":
            self.graph.delete_all()
        elif pattern == "n":
            self.graph.run("MATCH(n:" + label + ") DETACH DELETE n")
        elif pattern == "r":
            self.graph.run("MATCH (n)-[r:" + label + "]-(m) DETACH DELETE r")
        elif pattern == "nr":
            self.graph.run("MATCH (n)<-[r:" + label + "]-(m) DETACH DELETE r DELETE n")
        elif pattern == "rm":
            self.graph.run("MATCH (n)-[r:" + label + "]->(m) DETACH DELETE r DELETE m")
        elif pattern == "nrm":
            self.graph.run("MATCH (n)-[r:" + label + "]-(m) DETACH DELETE r DELETE n DELETE m")

    def reset(self, pattern="n", label=None, filename=None):
        """Reset data of label in database.
        重置数据库子图。

        Args:
            pattern: Type of subgraph. 子图类型。
            label: Label of subgraph. 子图标签。
        """ 
        assert filename is not None, "filename can not be None."
        self.delete(pattern="n", label="NluCell")
        print("Delete successfully!")
        if os.path.exists(filename):
            self.handle_excel(filename)
        else:
            print("You can set 'filename=<filepath>' when you call 'Database.reset.'")
        print("Reset successfully!")

    def reset_ts(self, pattern="n", label="TestStandard", filename=None):
        """Reset data of label in database.
        重置数据库子图。

        Args:
            pattern: Type of subgraph. 子图类型。
            label: Label of subgraph. 子图标签。
        """ 
        assert filename is not None, "filename can not be None."
        self.delete(pattern="n", label=label)
        print("Delete test standard successfully!")
        if os.path.exists(filename):
            self.handle_ts(filename)
        else:
            print("You can set 'filename=<filepath>' when you call 'Database.reset.'")
        print("Reset test standard successfully!")

    def add_qa(self, label="NluCell", name=None, content=None, topic="", \
    behavior="", parameter="", url="", tag="", keywords="", api="", txt="", \
    img="", chart="", delimiter=None):
        """
        Add qa node in graph.
        """
        assert name is not None, "name must be string."
        assert content is not None, "content must be string."
        questions = name.split(delimiter)
        for question in questions:
            if question: # 问题不能为空,避免因知识库表格填写格式不对而导致存入空问答对
                tag = get_tag(question, self.gconfig)
                node = Node(label, name=question, content=content, topic=topic, \
                behavior=behavior, parameter=parameter, url=url, tag=tag, \
                keywords=keywords, api=api, txt=txt, img=img, chart=chart, hot="0")
                self.graph.create(node)

    def add_ts(self, label="TestStandard", question=None, content=None, context="", \
    behavior="", parameter="", url=""):
        """
        Add test standard node in graph.
        """
        assert question is not None, "question must be string."
        assert content is not None, "content must be string."
        for item in question.split():
            if item: # 问题不能为空,避免因知识库表格填写格式不对而导致存入空问答对
                node = Node(label, question=item, content=content, context=context, \
                behavior=behavior, parameter=parameter, url=url)
                self.graph.create(node)

    def handle_ts(self, filename=None, custom_sheets=None):
        """Processing data of test standard.
        """
        assert filename is not None, "filename can not be None."
        data = read_excel(filename)
        data_sheets = data.sheet_names()
        if custom_sheets:
            sheet_names = list(set(data_sheets).intersection(set(custom_sheets)))
        else:
            sheet_names = data_sheets
        for sheet_name in sheet_names: # 可自定义要导入的子表格
            table = data.sheet_by_name(sheet_name)
            # 1.Select specified table
            # table = data.sheet_by_index(0)
            if data:
                # 2.Select specified column
                col_format = ['A', 'B', 'C', 'D', 'E', 'F']
                try:
                    nrows = table.nrows
                    # ncols = table.ncols
                    str_upcase = [i for i in string.ascii_uppercase]
                    i_upcase = range(len(str_upcase))
                    ncols_dir = dict(zip(str_upcase, i_upcase))
                    col_index = [ncols_dir.get(i) for i in col_format]
                    # 前两行为表头
                    for i in range(2, nrows):
                        question = table.cell(i, col_index[0]).value
                        content = table.cell(i, col_index[1]).value
                        context = table.cell(i, col_index[2]).value
                        behavior = table.cell(i, col_index[3]).value
                        parameter = table.cell(i, col_index[4]).value
                        url = table.cell(i, col_index[5]).value
                        self.add_ts(question=question, content=content, context=context, \
                        behavior=behavior, parameter=parameter, url=url)
                except Exception as error:
                    print('Error: %s' %error)
                    return None
            else:
                print('Error! Data of %s is empty!' %sheet_name)
                return None

    def handle_excel(self, filename=None, custom_sheets=None):
        """Processing data of excel.
        """
        assert filename is not None, "filename can not be None"
        data = read_excel(filename)
        data_sheets = data.sheet_names()
        if custom_sheets:
            sheet_names = list(set(data_sheets).intersection(set(custom_sheets)))
        else:
            sheet_names = data_sheets
        for sheet_name in sheet_names: # 可自定义要导入的子表格
            table = data.sheet_by_name(sheet_name)
            topics = []
            # 1.Select specified table
            # table = data.sheet_by_index(0)
            if data:
                # 2.Select specified column
                col_format = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M']
                try:
                    nrows = table.nrows
                    # ncols = table.ncols
                    str_upcase = [i for i in string.ascii_uppercase]
                    i_upcase = range(len(str_upcase))
                    ncols_dir = dict(zip(str_upcase, i_upcase))
                    col_index = [ncols_dir.get(i) for i in col_format]
                    # 前两行为表头
                    for i in range(2, nrows):
                        name = table.cell(i, col_index[0]).value
                        content = table.cell(i, col_index[1]).value
                        # TODO 确定用户可以自定义哪些内容
                        topic = table.cell(i, col_index[2]).value if self.is_admin else "user_chat"
                        behavior = table.cell(i, col_index[3]).value
                        parameter = table.cell(i, col_index[4]).value
                        url = table.cell(i, col_index[5]).value
                        tag = table.cell(i, col_index[6]).value
                        keywords = table.cell(i, col_index[7]).value
                        api = table.cell(i, col_index[8]).value
                        txt = table.cell(i, col_index[9]).value
                        img = table.cell(i, col_index[10]).value
                        chart = table.cell(i, col_index[11]).value
                        # hot = 0 table.cell(i, col_index[12]).value
					    # 3.Your processing function of excel data here
                        self.add_qa(name=name, content=content, topic=topic, \
                        behavior=behavior, parameter=parameter, url=url, tag=tag, \
                        keywords=keywords, api=api, txt=txt, img=img, chart=chart, \
                        delimiter="|")
                        # 添加到场景标签列表
                        topics.append(topic)
                except Exception as error:
                    print('Error: %s' %error)
                    return None
            else:
                print('Error! Data of %s is empty!' %sheet_name)
                return None
            # Modify in 2017.4.28
            # 若子表格名字不存在,新建配置子图,否则只修改topic属性
            # DeprecationWarning: Graph.find_one is deprecated, use NodeSelector instead. 2017-5-18
            # config_node = self.graph.find_one("Config", "name", sheet_name)
            config_node = self.selector.select("Config", name=sheet_name).first()
            if not config_node:
                self.graph.run('MATCH (user:User {userid: "' + self.gconfig["userid"] + \
                '"})\nCREATE (config:Config {name: "' + sheet_name + '", topic: "' + \
                ",".join(set(topics)) + '"})\nCREATE (user)-[:has {bselected: 1, available: 1}]->(config)')
            else:
                alltopics = config_node["topic"].split(",")
                alltopics.extend(topics)
                config_node["topic"] = ",".join(set(alltopics))
                self.graph.push(config_node)

    def handle_txt(self, filename=None):
        """
        Processing text file to generate subgraph.
        """
        assert filename is not None, "filename can not be None!"
        with open(filename, encoding="UTF-8") as file:
            question = file.readline().rstrip()
            while question:
                answer = file.readline().rstrip()
                print("question: " + question)
                print("answer: " + answer)
                self.add_qa(name=question, content=answer, delimiter="|")
                question = file.readline().rstrip()

    def register_subgraph(self, *, label="Config", name=None, topic=None):
        """注册子知识库
        """
        assert name is not None, "Subgraph name can not be None!"
        assert topic is not None, "Subgraph topic can not be None!"
        subgraph = self.selector.select(label, name=name).first()
        if subgraph:
            topics = subgraph["topic"].split(",")
            topics.extend(topic.split(","))
            subgraph["topic"] = ",".join(set(topics))
            self.graph.push(subgraph)
        else:
            node = Node(label, name=name, topic=topic)
            self.graph.create(node)

    def register_user(self, *, label="User", profile=None):
        """注册用户
        """
        userid = input("\n欢迎注册!请输入userid: ")
        while not userid:
            userid = input("userid不能为空!请输入userid: ")
        while self.graph.run("MATCH (user:User {userid: '" + userid + "'}) RETURN user").data():
            userid = input("用户已存在!请输入新的userid: ")
        username = input("username: "******"robotname: ")
        robotage = input("robotage: ")
        robotgender = input("robotgender: ")
        mother = input("mother: ")
        father = input("father: ")
        companyname = input("companyname: ")
        companytype = input("companytype: ")
        servicename = input("servicename: ")
        director = input("director: ")
        address = input("address: ")
        province = input("province: ")
        city = input("city: ")
        node = Node(label, userid=userid, username=username, robotname=robotname, \
        robotage=robotage, robotgender=robotgender, mother=mother, father=father, \
        companyname=companyname, companytype=companytype, servicename=servicename, \
        director=director, address=address, province=province, city=city)
        self.graph.create(node)
        print("注册成功!")
        # 设置知识库权限
        subgraph_names = [item["name"] for item in self.selector.select("Config")]
        print("可配置知识库列表:", subgraph_names)
        for name in subgraph_names:
            self.manage_user(userid=userid, name=name)

    def manage_user(self, *, userid=None, name=None):
        """管理用户
        """
        assert userid is not None, "Userid can not be None!"
        assert name is not None, "Subgraph name can not be None!"
        user = self.selector.select("User", userid=userid).first()
        if not user:
            print("用户不存在,建议您先注册!")
            return
        subgraph = self.selector.select("Config", name=name).first()
        if not subgraph:
            print("知识库不存在,建议您先注册!")
            return

        print("\n待配置知识库:", name)
        bselected = input("是否选择 [1/0]: ")
        if not bselected: bselected = "1"
        available = input("是否可用 [1/0]: ")
        if not available: available = "1"
        set_string = "MATCH (user:User {userid: '" + userid + "'}), (subgraph:Config {name: '" \
        + name + "'}) CREATE UNIQUE (user)-[r:has]->(subgraph) SET r.bselected=" \
        + bselected + ", r.available=" + available
        self.graph.run(set_string)
Esempio n. 35
0
    for row in f.readlines():
        row = row.strip()
        rowDict = row.split(':')
        bingming.append(rowDict[1])

dingyi = []
with open("简介.txt", encoding="utf-8") as f:
    for row in f.readlines():
        row = row.strip()
        dingyi.append(row)
bingyibingji = []
with open("病因病机.txt", encoding="utf-8") as f:
    for row in f.readlines():
        row = row.strip()
        bingyibingji.append(row)
bingzhengtezheng = []
with open("临床表现.txt", encoding="utf-8") as f:
    for row in f.readlines():
        row = row.strip()
        bingzhengtezheng.append(row)

matcher = NodeMatcher(graph)
for i in range(0, 44):
    n = matcher.match("皮肤病", 皮肤病名称=bingming[i])
    for node in n:
        #print(dict(node)['皮肤病名称'])
        node['皮肤病定义'] = dingyi[i]
        node['皮肤病病因病理'] = bingyibingji[i]
        node['皮肤病症状特点'] = bingzhengtezheng[i]
        graph.push(node)
Esempio n. 36
0
class DataBase:
    def __init__(self):
        py2neo.authenticate("localhost:7474", "neo4j", "st1215")
        self.graph = Graph("http://localhost:7474/db/data/")

    def get_all_news_from(self, site):
        # news=set()
        all_news = self.graph.run(
            'MATCH (s:Site)-[:PUBLICOU]-(n:News)-[:E]-(t:Tipo) WHERE s.name="'
            + site + '" RETURN n,t').data()
        dataSet = list()
        for n in all_news:
            dataSet.append(
                (n['n']['title'],
                 removerAcentosECaracteresEspeciais(n['n']['content']),
                 n['t']['description']))

        return dataSet

    def get_all_news_from_no_class(self, site):
        all_news = self.graph.run(
            'MATCH (s:Site)-[:PUBLICOU]-(n:News) WHERE s.name="' + site +
            '" RETURN n').data()
        dataSet = list()
        for n in all_news:
            dataSet.append(
                (n['n']['title'],
                 removerAcentosECaracteresEspeciais(n['n']['content']), ''))

        return dataSet

    def get_news_by_title(self, title):
        all_news = self.graph.run(
            'MATCH (s:Site)-[:PUBLICOU]-(n:News) WHERE n.title="' + title +
            '" RETURN n').data()
        news = News()
        for n in all_news:
            news.title = n['n']['title']
            news.url = news.title = n['n']['url']

        return news

    def get_all_data_set(self, sites):
        dataSet = list()
        for s in sites:
            dataSet.extend(self.get_all_news_from(s))
        return dataSet

    # def get_queue(self, site_url):
    #     queue=set()
    #     for s in SiteQueue.select(self.graph).where(site=site_url):
    #         queue.add(s.page)
    #     return queue
    #
    # def save_queue(self, site, page):
    #     queue=SiteQueue()
    #     queue.site=site
    #     queue.page=page
    #     self.graph.push(queue)

    def get_site(self, name):
        sites = Site.select(self.graph).where(name=name)
        for site in sites:
            return site

    def get_clazz(self, name):
        tipos = Tipo.select(self.graph).where(description=name)
        for tipo in tipos:
            return tipo

    def save_site(self, site_name, url):
        site = Site()
        site.name = site_name
        site.url = url
        self.graph.push(site)

    def save_news(self, site, url, title, sub_title, content, tipo):
        s = self.get_site(site)
        t = self.get_clazz(tipo)
        news = News()
        news.site.add(s)
        news.tipo.add(t)
        news.title = title
        news.sub_title = sub_title
        news.content = content
        news.url = url
        self.graph.merge(news)

    def create_rel(self, node1, node2):
        self.graph.create("(s:Site)-[:PUBLICOU]->(n:News)")

    def install(self):
        self.graph.run("MATCH (n) DETACH DELETE n")
        self.graph.run("MATCH (n) DETACH DELETE n")

    def delete(self):
        self.graph.delete_all()
        tipo = Tipo()
        tipo.description = 'False'
        self.graph.merge(tipo)
        tipo = Tipo()
        tipo.description = 'True'
        self.graph.merge(tipo)
class TestNeoDBHandler(unittest.TestCase):

    def setUp(self):

        self.graph = Graph(TEST_GRAPH_DB)

        self.node_list = [Node("TEST", test_id=i) for i in xrange(5)]
        
        # Nodes
        # -----
        for i, node in enumerate(self.node_list):
            node.labels.add("Twirp")
            node.properties.update({                
                    "user_id": i*100000,
                    "username":"",
                    "name":"",
                    "handle":"",
                    "followers_count":i*100,
                    "friends_count":i*50,
                    "tweet_count":i*10,
                    "retweet_count":i*5,
                    "been_retweeted_count":i*3,
                    "favourite_hashtag":"",
                    "hashtag_count":i*2,
                    "archipelago_id":i*1,
                    "subscribed": True,
                    "constituency":"CB"+str(i),
                    "offices":["office"+str(i), "sedge steward"],
            })

        self.node_list[0].properties.update({"username":"******", "name":"Michael Blue Eyes", "handle":"MBEyes", "favourite_hashtag":"#roth", "party":"DC" })
        self.node_list[1].properties.update({"username":"******", "name":"Little Richard", "handle":"LRichy", "favourite_hashtag":"#rawls", "party":"DC" })
        self.node_list[2].properties.update({"username":"******", "name":"The Boy Wonder", "handle":"tBW", "favourite_hashtag":"#richyfeynman", "party":"Marvel" })
        self.node_list[3].properties.update({"username":"******", "name":"Kendog Lamar", "handle":"Kdog", "favourite_hashtag":"#kanye", "party":"Marvel"})
        self.node_list[4].properties.update({"username":"******", "name":"Tiny Hands", "handle":"tinyhands", "favourite_hashtag":"#ihavetinyhands", "party":"Beano" })

        # Relationships
        # --------------
        # mbe -[MENTION]> lrich
        # mbe -[REPLIES]> ken 
        # lrich -[REPLIES]> mbe
        # tbw -[RETWEETS]> lrich
        # tbw -[MENTIONS_BY_PROXY]> mbe
        # ken -!->
        # th  -!->

        defaults = {
            "mentions":0,
            "mention_last":"",
            "mention_date":"",
            "replies":0,
            "reply_last":"",
            "reply_date":"",
            "retweets":0,
            "retweet_last":"",
            "retweet_date":""
            }

        mbe1 = Relationship(self.node_list[0], "DIRECT" ,self.node_list[1], **defaults)
        mbe2 = Relationship(self.node_list[0], "DIRECT" ,self.node_list[3], **defaults)
        lrich = Relationship(self.node_list[1], "DIRECT", self.node_list[0], **defaults)
        tbw = Relationship(self.node_list[2], "DIRECT", self.node_list[1],  **defaults)
        tbw2 = Relationship(self.node_list[2], "INDIRECT", self.node_list[0],  **defaults)


        mbe1.properties.update({
            "mentions":5,
            "mention_last":"1000000",
            "mention_date":"today"
        })
        mbe2.properties.update({
            "replies":10,
            "reply_last":"2000000",
            "reply_date":"tommorow"
        })
        lrich.properties.update({
            "replies":15,
            "reply_last":"3000000",
            "reply_date":"yesterday"
        })
        tbw.properties.update({
            "retweets":20,
            "retweet_last":"4000000",
            "retweet_date":"thismorning"
        })
        tbw2.properties.update({
            "mentions":1,
            "mention_last":"3000000",
            "mention_date":"yesterday"
        })


        for node in self.node_list:
            self.graph.create(node)
        
        self.graph.create(mbe1)
        self.graph.create(mbe2)
        self.graph.create(lrich)
        self.graph.create(tbw)
        self.graph.create(tbw2)

        self.graph.push()

    def tearDown(self):
        
        # remove test items
        self.graph.cypher.execute("MATCH (n:TEST) DETACH DELETE n")

        empty_list = [ _ for _ in self.graph.find('TEST') ]
        self.assertEqual( empty_list, [])




        ########################################################################
        #                          CYPHER QUERIES                              #
        ########################################################################

    def test_get_party_nodes(self):
        neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB)

        test_reference = [
           {
                "name":"Kendog Lamar", 
                "handle":"Kdog", 
                "party":"Marvel",
                "constituency":"CB3",
                "offices":["office3", "sedge steward"],

                "tweets": 30,
                "friends": 150, 
                "followers": 300,
                "archipelago_id": 3,

                "tweeted":[],
                "mentions":[],
                "mention_last":[],
                "mention_date":[],                
                "replies":[],
                "reply_last":[],
                "reply_date":[],
                "retweets":[],
                "retweet_last":[],
                "retweet_date":[],
                "tweet_type":[]

           },
           {
                "name":"The Boy Wonder", 
                "handle":"tBW", 
                "party":"Marvel",
                "constituency":"CB2",
                "offices":["office2", "sedge steward"],

                "tweets": 20,
                "friends": 100, 
                "followers": 200,
                "archipelago_id": 2,

                
                "tweeted":['MBEyes','LRichy'],
                "mentions":[1, 0],
                "mention_last":['3000000', ""],
                "mention_date":['yesterday', ""],                
                "replies":[0,0],
                "reply_last":["",""],
                "reply_date":["",""],
                "retweets":[0, 20],
                "retweet_last":["",'4000000'],
                "retweet_date":["", 'thismorning'],
                "tweet_type":["INDIRECT", "DIRECT"]

           }
        ]

        # Make request
        results = [ _ for _ in neo_db_handler.get_party_nodes('Marvel', 0) ]

        # Test against reference
        self.assertEqual(len(results), 2)
        
        for i in range(2):
            for key in test_reference[i].keys():
                self.assertEqual(results[i][key], test_reference[i][key] )
    
    def test_get_party_nodes_min_tweet(self):
        neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB)

        test_reference = [
           {
                "name":"Kendog Lamar", 
                "handle":"Kdog", 
                "party":"Marvel",
                "constituency":"CB3",
                "offices":["office3", "sedge steward"],

                "tweets": 30,
                "friends": 150, 
                "followers": 300,
                "archipelago_id": 3,

                "tweeted":[],
                "mentions":[],
                "mention_last":[],
                "mention_date":[],                
                "replies":[],
                "reply_last":[],
                "reply_date":[],
                "retweets":[],
                "retweet_last":[],
                "retweet_date":[],
                "tweet_type":[]
           },
           {
                "name":"The Boy Wonder", 
                "handle":"tBW", 
                "party":"Marvel",
                "constituency":"CB2",
                "offices":["office2", "sedge steward"],

                "tweets": 20,
                "friends": 100, 
                "followers": 200,
                "archipelago_id": 2,

                "tweeted":['LRichy'],
                "mentions":[0],
                "mention_last":[""],
                "mention_date":[""],                
                "replies":[0],
                "reply_last":[""],
                "reply_date":[""],
                "retweets":[20],
                "retweet_last":['4000000'],
                "retweet_date":['thismorning'],
                "tweet_type":["DIRECT"]
           }
        ]

        # Make request
        results = [ _ for _ in neo_db_handler.get_party_nodes('Marvel', 5) ]

        # Test against reference
        self.assertEqual(len(results), 2)

        for i in range(2):
            for key in test_reference[i].keys():
                self.assertEqual(results[i][key], test_reference[i][key] )


    def test_get_cross_party_nodes_default(self):
        neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB)

        test_reference = [
           {
                "name":"The Boy Wonder", 
                "handle":"tBW", 
                "party":"Marvel",
                "constituency":"CB2",
                "offices":["office2", "sedge steward"],

                "tweets": 20,
                "friends": 100, 
                "followers": 200,
                "archipelago_id": 2,

                "tweeted":['MBEyes','LRichy'],
                "mentions":[1, 0],
                "mention_last":['3000000', ""],
                "mention_date":['yesterday', ""],                
                "replies":[0,0],
                "reply_last":["",""],
                "reply_date":["",""],
                "retweets":[0, 20],
                "retweet_last":["",'4000000'],
                "retweet_date":["", 'thismorning'],
                "tweet_type":["INDIRECT", "DIRECT"]
           }
        ]

        results = [ _ for _ in neo_db_handler.get_cross_party_nodes('Marvel', 'DC', 0 ) ]

        # Test against reference
        self.assertEqual(len(results), 1)

        for i in range(1):
            for key in test_reference[i].keys():
                self.assertEqual(results[i][key], test_reference[i][key] )

    def test_get_cross_party_nodes_min_tweets(self):
        neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB)

        test_reference = [
           {
                "name":"The Boy Wonder", 
                "handle":"tBW", 
                "party":"Marvel",
                "constituency":"CB2",
                "offices":["office2", "sedge steward"],

                "tweets": 20,
                "friends": 100, 
                "followers": 200,
                "archipelago_id": 2,

                "tweeted":['LRichy'],
                "mentions":[0],
                "mention_last":[""],
                "mention_date":[""],                
                "replies":[0],
                "reply_last":[""],
                "reply_date":[""],
                "retweets":[20],
                "retweet_last":['4000000'],
                "retweet_date":['thismorning'],
                "tweet_type":["DIRECT"]
           }
        ]

        results = [ _ for _ in neo_db_handler.get_cross_party_nodes('Marvel', 'DC', 5) ]

        # Test against reference
        self.assertEqual(len(results), 1)

        for i in range(1):
            for key in test_reference[i].keys():
                self.assertEqual(results[i][key], test_reference[i][key] )



        ########################################################################
        #               ADDING TO DB   (TWIRPS CLASSES)->(PY2NEO OBJS)         #
        ########################################################################
        
    def test_add_Twirp_to_database(self):
        neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB)

        # Test Data
        new_twirp = Twirp(None, 'test')
        new_twirp.id = 314150000000
        new_twirp.username = '******'
        new_twirp.name = 'Bilbo Baggins'
        new_twirp.handle = 'bilbo'
        new_twirp.followers_count = 20
        new_twirp.friends_count = 30
        new_twirp.tweet_count = 40
        new_twirp.retweet_count = 50
        new_twirp.been_retweet_count = 60 
        new_twirp.favourite_hashtag = '#onering'
        new_twirp.hashtag_count = 70
        new_twirp.archipelago_id = 80 
        new_twirp.twirps_type = -1
        new_twirp.subscribed = False
        new_twirp.geo = False

        # Add to database (with 'TEST' label)
        neo_db_handler.add_Twirp_to_database(new_twirp, is_test_mode=True)

        # Check results
        results = [ _ for _ in self.graph.cypher.execute(
                                    "MATCH (n {handle:'bilbo'}) RETURN n")]        
        self.assertEqual(len(results), 1)
        node = results[0][0]

        # Interrogate Node
        self.assertEqual(node.get_labels(), [u'TEST', u'Twirp', u'Other'])

        self.assertEqual(node["user_id"],314150000000)
        self.assertEqual(node["username"],'BilboBagginsMP')
        self.assertEqual(node["name"],'Bilbo Baggins')
        self.assertEqual(node["handle"],'bilbo')
        self.assertEqual(node["followers_count"],20)
        self.assertEqual(node["friends_count"],30)
        self.assertEqual(node["tweet_count"],40)
        self.assertEqual(node["retweet_count"],50)
        self.assertEqual(node["been_retweeted_count"],60 )
        self.assertEqual(node["favourite_hashtag"],'#onering')
        self.assertEqual(node["hashtag_count"],70)
        self.assertEqual(node["archipelago_id"],80 )
        self.assertEqual(node["subscribed"],False)


    def test_add_Tweet_to_database__mention(self):
        # TEST: (LRich)->(tinyhands) - mention: ("Hey @tinyhands")
        neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB)

        # Test Data
        new_tweet = Tweet(None, 'test')

        new_tweet.tweet_id = 1
        new_tweet.user_id = 100000
        new_tweet.handle = 'LRichy'
        new_tweet.mentions = [(400000, 'tinyhands')] 
        new_tweet.content = 'Generic tweet @tinyhands'  # not stored here
        
        new_tweet.is_retweet = False
        new_tweet.retweeted_user = None
        new_tweet.retweet_status_id = 0
        
        new_tweet.is_reply = False
        new_tweet.in_reply_to_user = None
        new_tweet.in_reply_to_status_id = None
        
        new_tweet.retweet_count = 3           # not stored here
        new_tweet.favourite_count = 4         # not stored here
        new_tweet.hashtags = ['clothes']      # not stored here
        new_tweet.date = 'a date string'
        new_tweet.urls = ['https://url.com']  # not stored here
        new_tweet.website_link = 'twitter.com/status/madeupstatus1'

        # Add to database
        neo_db_handler.add_Tweet_to_database(new_tweet)

        # Preliminary check 
        results = [ _ for _ in self.graph.cypher.execute(
                    """MATCH (a {handle:'LRichy'})-[r]->(b {handle:'tinyhands'})
                       RETURN r""")]        
        self.assertEqual(len(results), 1)
        relationship =  results[0][0]

        # In depth check
        self.assertEqual(relationship.type, u'DIRECT')

        self.assertEqual(relationship["mentions"], 1)
        self.assertEqual(relationship["mention_last"], '1')
        self.assertEqual(relationship["mention_date"], 'a date string')

        self.assertEqual(relationship["replies"], 0)
        self.assertEqual(relationship["reply_last"], '')
        self.assertEqual(relationship["reply_date"], '')
        
        self.assertEqual(relationship["retweets"], 0)
        self.assertEqual(relationship["retweet_last"], '')
        self.assertEqual(relationship["retweet_date"], '')
       

    def test_add_Tweet_to_database__reply(self):
        # TEST: (LRich) ->(tBW) - reply & mention; 
        #       (LRich) ->(tinyhands) mention   EG: (reply->tBW):"Hey @tBW, @tinyhands"
        neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB)

        # Test Data
        new_tweet = Tweet(None, 'test')

        new_tweet.tweet_id = 1
        new_tweet.user_id = 100000
        new_tweet.handle = 'LRichy'
        new_tweet.mentions = [(400000, 'tinyhands'), (200000, 'tBW')] 
        new_tweet.content = 'Generic tweet @tinyhands @tBW'  # not stored here      
        
        new_tweet.is_retweet = False
        new_tweet.retweeted_user = None
        new_tweet.retweet_status_id = 0
        
        new_tweet.is_reply = True
        new_tweet.in_reply_to_user = (200000, 'tBW')
        new_tweet.in_reply_to_status_id = 2
        
        new_tweet.retweet_count = 3              # not stored here   
        new_tweet.favourite_count = 4            # not stored here
        new_tweet.hashtags = ['clothes']         # not stored here
        new_tweet.date = 'a date string'
        new_tweet.urls = ['https://url.com/']    # not stored here
        new_tweet.website_link = 'twitter.com/status/madeupstatus1'

        # Add to database
        neo_db_handler.add_Tweet_to_database(new_tweet) 

        # Preliminary check
        results = [ _ for _ in self.graph.cypher.execute(
                                """MATCH (a {handle:'LRichy'})-[r]->(b) 
                                   WHERE b.handle<>'MBEyes' 
                                   RETURN r, b.name ORDER BY b.name""")]        
        
        self.assertEqual(len(results), 2)

        # In depth check
        self.assertEqual(results[0][0].type, u'DIRECT')
        self.assertEqual(results[0][1], 'The Boy Wonder')

        self.assertEqual(results[0][0]["mentions"], 0)
        self.assertEqual(results[0][0]["mention_last"], '')
        self.assertEqual(results[0][0]["mention_date"], '')

        self.assertEqual(results[0][0]["replies"], 1)
        self.assertEqual(results[0][0]["reply_last"], '1')
        self.assertEqual(results[0][0]["reply_date"], 'a date string')
        
        self.assertEqual(results[0][0]["retweets"], 0)
        self.assertEqual(results[0][0]["retweet_last"], '')
        self.assertEqual(results[0][0]["retweet_date"], '')


        self.assertEqual(results[1][0].type, u'DIRECT')
        self.assertEqual(results[1][1], 'Tiny Hands')

        self.assertEqual(results[1][0]["mentions"], 1)
        self.assertEqual(results[1][0]["mention_last"], '1')
        self.assertEqual(results[1][0]["mention_date"], 'a date string')

        self.assertEqual(results[1][0]["replies"], 0)
        self.assertEqual(results[1][0]["reply_last"], '')
        self.assertEqual(results[1][0]["reply_date"],  '')
        
        self.assertEqual(results[1][0]["retweets"], 0)
        self.assertEqual(results[1][0]["retweet_last"], '')
        self.assertEqual(results[1][0]["retweet_date"], '')


    def test_add_Tweet_to_database__retweet(self):
        # TEST: (tiny) ->(MBEyes) - reply & mention; 
        #       (tiny) ->(Kdog) mention_by_proxy   EG: (ret->MBE):"Hey @MBE, @Kdog"

        neo_db_handler = NeoDBHandler(n4_database=TEST_GRAPH_DB)

        # Test Data
        new_tweet = Tweet(None, 'test')

        new_tweet.tweet_id = 1
        new_tweet.user_id = 400000
        new_tweet.handle = 'tinyhands'
        new_tweet.mentions = [(300000, 'Kdog')] 
        new_tweet.content = 'Generic tweet @Kdog'  # not stored here

        new_tweet.is_retweet = True
        new_tweet.retweeted_user = (0, 'MBEyes')
        new_tweet.retweet_status_id = 2

        new_tweet.is_reply = False
        new_tweet.in_reply_to_user = None
        new_tweet.in_reply_to_status_id = None
        
        new_tweet.retweet_count = 3                # not stored here
        new_tweet.favourite_count = 4              # not stored here
        new_tweet.hashtags = []                    # not stored here
        new_tweet.date = 'a date string'
        new_tweet.urls = ['https://url.com/']      # not stored here
        new_tweet.website_link = 'twitter.com/status/madeupstatus1'

        # Add to database
        neo_db_handler.add_Tweet_to_database(new_tweet) 

        # Preliminary check
        results = [ _ for _ in self.graph.cypher.execute(
                                    """MATCH (a {handle:'tinyhands'})-[r]->(b) 
                                    RETURN r, b.name ORDER BY b.name""")]        

        self.assertEqual(len(results), 2)

        # In depth check
        self.assertEqual(results[0][0].type, u'INDIRECT')
        self.assertEqual(results[0][1], 'Kendog Lamar')

        self.assertEqual(results[0][0]["mentions"], 1)
        self.assertEqual(results[0][0]["mention_last"], '1')
        self.assertEqual(results[0][0]["mention_date"], 'a date string')

        self.assertEqual(results[0][0]["replies"], 0)
        self.assertEqual(results[0][0]["reply_last"], '')
        self.assertEqual(results[0][0]["reply_date"], '')
        
        self.assertEqual(results[0][0]["retweets"], 0)
        self.assertEqual(results[0][0]["retweet_last"], '')
        self.assertEqual(results[0][0]["retweet_date"], '')


        self.assertEqual(results[1][0].type, u'DIRECT')
        self.assertEqual(results[1][1], 'Michael Blue Eyes')

        self.assertEqual(results[1][0]["mentions"], 0)
        self.assertEqual(results[1][0]["mention_last"], '')
        self.assertEqual(results[1][0]["mention_date"], '')

        self.assertEqual(results[1][0]["replies"], 0)
        self.assertEqual(results[1][0]["reply_last"], '')
        self.assertEqual(results[1][0]["reply_date"], '')
        
        self.assertEqual(results[1][0]["retweets"], 1)
        self.assertEqual(results[1][0]["retweet_last"], '1')
        self.assertEqual(results[1][0]["retweet_date"], 'a date string')