Ejemplo n.º 1
1
class TwitterGraph(object):
    '''A class for interfacing with the Neo4j Twitter network database'''

    # Initial setup and linking into the database
    def __init__(self, host_port, user, password):
        '''Makes connection to Neo4j database'''
        # set up authentication parameters
        authenticate(host_port, user, password)
        # connect to authenticated graph database
        url = 'http://{}/db/data/'.format(host_port)
        self.graph = Graph(url)
        try:
            self.graph.schema.create_uniqueness_constraint('User', 'id')
        except: #ConstraintViolationException
            print 'Unique id on Node User already exists'

    # Functions to add data to the database
    def add_following(self, user_id, following_ids, rec_count):
        '''Given a unique user id, adds the relationship for who they follow.
        Adds a User Node with the id if it doesn't exist.'''
        user = Node('User', id=user_id)
        self.graph.merge(user) # important to merge before doing anything
        rec = 1 + rec_count
        # preserving the order of the following. 1 = most recent
        for fid in following_ids:
            user2 = Node('User', id=fid)
            self.graph.merge(user2)
            self.graph.merge(Relationship(user, 'FOLLOWS', user2, rec=rec))
            rec += 1
        user['following_added'] = True
        self.graph.push(user)

    def add_followers(self, user_id, follower_ids, rec_count):
        '''Given a unique user id, adds the relationship for follows them.
        Adds a User Node with the id if it doesn't exist.'''
        user = Node('User', id=user_id)
        self.graph.merge(user)
        rec = 1 + rec_count
        for fid in follower_ids:
            user2 = Node('User', id=fid)
            self.graph.merge(user2)
            self.graph.merge(Relationship(user2, 'FOLLOWS', user, rec=rec))
            rec += 1
        user['followers_added'] = True
        self.graph.push(user)

    def add_user_properties(self, user):
        '''Given a unique user id, adds properties to the existing user Node'''
        try:
            user_id = user.id
            existing_user = Node('User', id=user_id)
            clean_prop_dict = self.__clean_user_dict(user.__dict__)
            self.graph.merge(existing_user)
            for k, v in clean_prop_dict.iteritems():
                existing_user[k] = v
            # add additional label to verified accounts
            if clean_prop_dict['verified']:
                print True
                existing_user.add_label('Verified')
        except:
            # bad user id
            user_id = user['user_id']
            error = user['error']
            existing_user = Node('User', id=user_id)
            self.graph.merge(existing_user)
            existing_user['screen_name'] = 'INVALID'
            existing_user['error'] = error
            print 'Found invalid user id'
        self.graph.push(existing_user)

    def __clean_user_dict(self, user_prop_dict):
        '''Given the '''

        keep = ['contributors_enabled', 'created_at', 'default_profile',
                'default_profile_image', 'description', 'favourites_count',
                'followers_count', 'friends_count', 'geo_enabled', 'id',
                'id_str', 'is_translator', 'lang', 'listed_count', 'location',
                'name', 'profile_image_url_https', 'protected', 'screen_name',
                'statuses_count', 'time_zone', 'utc_offset', 'verified',
                'withheld_in_countries', 'withheld_scope']

        # only keep the above keys for inserting
        clean = {k: v for k, v in user_prop_dict.iteritems() if k in keep}
        image = os.path.splitext(clean['profile_image_url_https'])[0]
        ext = os.path.splitext(clean['profile_image_url_https'])[1]
        clean['profile_image_url_https'] = image.rstrip('_normal') + ext
        # convert date time to string
        clean['created_at_ord'] = clean['created_at'].toordinal()
        clean['created_at'] = clean['created_at'].strftime('%Y-%m-%d %H:%M:%S')
        return clean

    # Functions to query database
    def get_nodes_missing_props(self, limit=100):
        '''Returns the first 100 ids of nodes without user properties'''
        selector = NodeSelector(self.graph)
        selected = selector.select('User').where("_.screen_name IS NULL").limit(limit)
        return [s['id'] for s in selected]

    def get_nodes_missing_props_follb(self, limit=100):
        cypherq = """MATCH (n)-[r:FOLLOWS]->(m)
                     WHERE m.screen_name = 'BernieSanders'
                     AND NOT EXISTS(n.screen_name)
                     RETURN n.id
                     LIMIT 100;"""
        return [i['n.id'] for i in self.graph.run(cypherq).data()]

    def get_nodes_missing_rels(self, rel='FOLLOWING', limit=1):
        '''Returns ids missing the follower or following relationships.
        Valid inputs for rel is FOLLOWING or FOLLOWERS'''
        selector = NodeSelector(self.graph)
        if rel == 'FOLLOWING':
            selected = selector.select('User').where("_.following_added IS NULL").limit(limit)
        elif rel == 'FOLLOWERS':
            selected = selector.select('User').where("_.followers_added IS NULL").limit(limit)
        else:
            # TO DO: flesh out the exception calling
            raise Exception
        return [s['id'] for s in selected]

    def get_nodes_missing_rels_params(self, rel='FOLLOWING'):
        cypherq = """MATCH (n:User)-[r:FOLLOWS]->(m:User)
                                     WHERE n.followers_count >= 1000
                                     AND NOT EXISTS(n.following_added)
                                     AND m.screen_name = 'BernieSanders'
                                     RETURN n.id
                                     LIMIT 100;"""
        return [i['n.id'] for i in self.graph.run(cypherq).data()]

    def get_nodes_missing_rels_bfriends(self, rel='FOLLOWING'):
        cypherq = """MATCH (n:User)<-[r:FOLLOWS]-(m:User)
                                     WHERE m.screen_name = 'BernieSanders'
                                     AND NOT EXISTS(n.following_added)
                                     RETURN n.id
                                     LIMIT 100;"""
        return [i['n.id'] for i in self.graph.run(cypherq).data()]

    def get_nodes_missing_rels_bfriends_step(self, rel='FOLLOWING'):
        cypherq = """MATCH (n:User)<-[r:FOLLOWS]-(m:User)
                                     WHERE m.screen_name = 'BernieSanders'
                                     AND NOT EXISTS(n.following_added)
                                     RETURN n.id
                                     LIMIT 500;"""
        return [i['n.id'] for i in self.graph.run(cypherq).data()[-100:]]
Ejemplo n.º 2
0
def add_property(node, property_key, property_value):
    print("{}, {}, {}.".format(node, property_key, property_value))
    graph = Graph('http://*****:*****@localhost:7474/db/data/')
    graph_node = Node("Person", name="" + node + "")
    graph.merge(graph_node)
    graph_node["{}".format(property_key)] = "{}".format(property_value)
    graph_node.push()
Ejemplo n.º 3
0
class Neo4jGraph:
    def __init__(self,
                 uri: str,
                 auth: Tuple[str, str]
                 ):
        self._graph = Graph(uri=uri, auth=auth)

    def commit_relation(self,
                        src: Dict[str, str],
                        rel: Dict[str, str],
                        dst: Dict[str, str],
                        ) -> None:
        srckind = src['kind']
        srcnode = Node(
            srckind, **{k: v for k, v in src.items() if k != 'kind'})
        dstkind = dst['kind']
        dstnode = Node(
            dstkind, **{k: v for k, v in dst.items() if k != 'kind'})
        relkind = rel['kind']
        relationship = Relationship(srcnode, relkind, dstnode,
                                    **{k: v for k, v in rel.items() if k != 'kind'})
        self._graph.merge(srcnode, "Author", "name")
        self._graph.create(dstnode)
        self._graph.create(relationship)

    def run(self, query: str):
        return self._graph.run(query)
Ejemplo n.º 4
0
def moveProficiencyTable():
    # get a list of all unique learners
    # neo4j graph connector
    graph = Graph()

    lids = session.execute(
        "SELECT DISTINCT learner_id from learnerproficiency")
    for lid in lids:
        # get the knowledge state for this guy
        # <concept-id>,<socre> in schema

        uid = lid['learner_id']
        # create a learner node
        node = Node("Learner", id=uid)
        graph.merge(node, "Learner", "id")

        print("** learner:", uid)

        profDict = session.execute(
            "SELECT proficiency from learnerproficiency WHERE learner_id='" +
            uid + "'")[0]['proficiency']
        for cid, score in profDict.items():
            print("concept:", cid, "score", score)

            # create/find concept node
            node2 = Node("Concept", id=cid)
            graph.merge(node2, "Concept", "id")
            # add a relationship with property score
            graph.create(Relationship(node, "ASSESSED_IN", node2, score=score))
Ejemplo n.º 5
0
    def save_node(self, label, properties_dict, unique=True):
        ''' create neo4j node, with a label, and properties '''
        if unique == True:
            length, lst = self.exists_node(label, properties_dict['name'])
            if length > 0:
                #exists update
                g = Graph(password=self.password)
                b = lst[0]
                g.merge(b)
                for k, v in properties_dict.items():
                    b[k] = v
                #b['age'] = properties_dict['age']
                #b['x'] = 8
                g.push(b)

            else:
                #does not exist, insert new
                g = Graph(password=self.password)
                tx = g.begin()
                a = Node(label, **properties_dict)
                tx.create(a)
                tx.commit()
        else:
            # allow new duplicate nodes, why???
            raise Exception("do not allow duplicate named nodes")
Ejemplo n.º 6
0
def moveContentSummaryTable():
    graph = Graph()

    lids = session.execute(
        "SELECT DISTINCT learner_id from learnercontentsummary")
    for lid in lids:
        uid = lid['learner_id']
        print("** learner:", uid)
        # content_id text, interactions_per_min double,
        #num_of_sessions_played int,
        #time_spent double,
        node = Node("Learner", id=uid)
        graph.merge(node, "Learner", "id")

        contentDict = session.execute(
            "SELECT * from learnercontentsummary WHERE learner_id='" + uid +
            "'")[0]
        cid = contentDict['content_id']
        tsp = contentDict['time_spent']
        ipm = contentDict['interactions_per_min']

        node2 = Node("Content", id=cid)
        graph.merge(node2, "Content", "id")
        # add a relationship with property score
        graph.create(
            Relationship(node,
                         "INTERACTED_WITH",
                         node2,
                         timeSpent=tsp,
                         ipm=ipm))
        print('content: ', cid, 'tsp: ', tsp, 'ipm', ipm)
Ejemplo n.º 7
0
def moveRelevancyTableAll():

    graph = Graph()

    # get a list of all unique learners
    lids = session.execute(
        "SELECT DISTINCT learner_id from learnerconceptrelevance")

    for lid in uids:
        # get the knowledge state for this guy
        # <concept-id>,<score> in schema

        uids = [lid['learner_id'] for lid in lids]
        node = Node("Learner", id=uid)
        graph.merge(node, "Learner", "id")

        print("** learner:", uid)

        relDict = session.execute(
            "SELECT relevance from learnerconceptrelevance WHERE learner_id='"
            + uid + "'")[0]['relevance']
        for cid, score in relDict.items():
            #print("concept:",cid,"score",score)
            # create a node, if it does not exist
            # else, merge with it
            node2 = Node("Concept", id, cid)
            graph.merge(node2, "Concept", "id")
            # add a relationship with property score
            graph.create(Relationship(node2, "RELEVANT_TO", node, score=score))
Ejemplo n.º 8
0
def main(path: 'Set path to file'):
    graph = build_graph(path)
    graph_db = Graph("http://neo4j:7474/db/data")

    for url in graph:
        if url is None:
            continue
        tx = graph_db.begin()
        try:
            url_node = Node("Url", name=url)
            graph_db.merge(url_node)
            for link in graph[url]:
                if link is None:
                    continue
                try:
                    link_node = Node("Url", name=link)
                    graph_db.merge(link_node)

                    node_relation = Relationship(url_node, "LINKS_TO",
                                                 link_node)
                    tx.create(node_relation)
                except Exception as e:
                    print("Error in building relationship: " + str(e))
            tx.commit()
        except Exception as e:
            print("got error: " + str(e))
            tx.rollback()
            continue
    print("Added to neo4j")
Ejemplo n.º 9
0
def add_triples_to_neo4j_db(triples):
    graph = Graph("bolt://localhost:7687", auth=("neo4j", "eragold"))
    for triple in triples:
        subj = Node("Person", name=triple[0])
        obj = Node("Entity", name=triple[1])
        re = Relationship.type(triple[2])(subj, obj)
        graph.merge(re, 'Person', 'name')
Ejemplo n.º 10
0
def add_property2(node, *labels, **params):
    node_value = node
    graph = Graph('http://*****:*****@localhost:7474/db/data/')
    graph_node = Node(labels, name="" + node + "")
    graph.merge(graph_node)
    for k, v in params:
        graph_node["{}".format(k)] = "{}".format(v)
        graph_node.push()
Ejemplo n.º 11
0
def put_data_frame_in_db(df):
    graph = Graph(password="******")
    for row in df.itertuples():
        user = Node('User', id=row.user.item())
        deal = Node('Deal', id=row.deal.item())
        graph.merge(user)
        graph.merge(deal)
        user.push()
        graph.create(Relationship(user, "rates", deal, rating=row.rating))
Ejemplo n.º 12
0
def lambda_handler(event, context):
    graph = Graph(host=os.environ["NAME_NEO_DOMAIN"],
                  user=os.environ["USER"],
                  password=os.environ["PASSWORD"])

    user = Node("User", id=event['id'])
    graph.merge(user)
    for key, value in event['datas'].items():
        user[key] = value
    graph.push(user)
Ejemplo n.º 13
0
class DiseasePipeline(object):

    def __init__(self):
        self.graph = Graph(NEO4J_URL, auth = (NEO4J_USERNAME, NEO4J_PASSWORD))
        self.graph.delete_all()
        # self.file = open('test.txt', "a+")
    def process_item(self, item, spider):
        # self.file.write(str(item) + '\n\n')
        # self.file.flush()

        item['name'] = item['name'].strip()

        node = self.graph.nodes.match('disease', name = item['name']).first()
        if node is None:    # 如果不存在这种疾病,那就创建它
            node = Node('disease', **item)
            self.graph.create(node)
            node = self.graph.nodes.match('disease', name = item['name']).first()
        else:               # 如果已经存在了这个疾病,那就更新它
            node.update(item)
            self.graph.merge(node, 'disease', 'name')

        # 建立相关疾病的联系
        relatedDiseases = item['relatedDisease']
        for disease in relatedDiseases:
            disease = disease.strip()
            newNode = self.graph.nodes.match('disease', name = disease).first()

            if newNode is None:    # 如果不存在这种疾病,那就创建它,从而能够建立联系
                newNode = Node('disease', name = disease)
                self.graph.create(newNode)
                newNode = self.graph.nodes.match('disease', name = disease).first()

            # 查询两种疾病之间是否存在相关联系,若不存在,则创建这个联系
            r = Relationship(node, "ralate", newNode)
            if self.graph.match_one((node, newNode), r_type = 'relate') is None:
                self.graph.create(r)
        
        # 建立疾病与症状之间的联系
        symptoms = item['typicalSymptom'].split('、')
        for symptom in symptoms:
            symptom = symptom.strip() # 消除多余的空格
            newNode = self.graph.nodes.match('symptom', name = symptom).first()
            
            if newNode is None: # 如果不存在这个症状,那就创建它
                newNode = Node('symptom', name = symptom)
                self.graph.create(newNode)
                newNode = self.graph.nodes.match('symptom', name = symptom).first()
                       
            # 查询两种疾病之间是否存在伴随联系,若不存在,则创建这个联系
            r = Relationship(node, 'have', newNode)
            if self.graph.match_one((node, newNode), r_type = 'have') is None:
                self.graph.create(r)
Ejemplo n.º 14
0
def moveRelevancyTable(n=10):
    # get a list of all unique learners
    # filepath = "batch-models/src/test/resources/concept-similarity/ConceptSimilarity.json"
    # neo4j graph connector
    graph = Graph()
    # only compute bottom "n" and top "n" relevent concepts

    lids = session.execute(
        "SELECT DISTINCT learner_id from learnerconceptrelevance")
    for lid in lids:
        # get the knowledge state for this guy
        # <concept-id>,<rel score> in schema
        uid = lid['learner_id']
        # create a learner node
        node = Node("Learner", id=uid)
        graph.merge(node, "Learner", "id")

        print("** learner:", uid)

        relDict = session.execute(
            "SELECT relevance from learnerconceptrelevance WHERE learner_id='"
            + uid + "'")[0]['relevance']
        rawScores = relDict.values()
        qU = round(sorted(rawScores, reverse=True)[n - 1] * 1e4) / 1e4
        qL = round(sorted(rawScores)[n - 1] * 1e4) / 1e4

        for cid, rawscore in relDict.items():
            score = round(rawscore * 1e4) / 1e4
            if (score >= qU):

                print("concept:", cid, "score", score)
                # create/find concept node
                node2 = Node("Concept", id=cid)
                graph.merge(node2, "Concept", "id")
                # add a relationship with property score
                graph.create(
                    Relationship(node2, "RELEVENT_FOR", node, score=score))
            elif (score <= qL):
                print("concept:", cid, "score", score)
                # create/find concept node
                #node2 = graph.merge_one("Concept","id",cid)
                # add a relationship with property score
                #graph.create(Relationship(node2, "NOT_RELEVENT_FOR", node,score=score))
                pass
            else:
                pass
Ejemplo n.º 15
0
def upload_entity(entry):
    """
    Upload entry into Graph database
    """

    graph = Graph('http://*****:*****@id']
    name = entry['result']['name']
    description = entry['result']['description']
    entity = Node('Entity', id=id_num, description=description, name=name)
    graph.merge(entity, 'Entity', 'id')
    graph.merge(entity, 'Entity', 'description')
    graph.merge(entity, 'Entity', 'name')
    for n in entry['result']['@type']:
        item = Node('Item', type=n)
        graph.merge(item, 'Item', 'type')
        graph.merge(Relationship(entity, "IS", item))
Ejemplo n.º 16
0
def mockConceptCoverage():

    # neo4j graph connector
    authenticate("localhost:7474", "neo4j", "1sTep123")
    graph = Graph()

    cypher = graph.cypher
    # get a list of all content
    conceptDict = cypher.execute("MATCH (x:Concept) RETURN x.id as concept")
    contentDict = cypher.execute("MATCH (x:Content) RETURN x.id as content")
    n = len(contentDict)

    for concept in conceptDict:
        id = concept.concept
        node = Node("Concept", id=id)
        graph.merge(node, "Concept", "id")

        i = random.randint(0, n - 1)
        id = contentDict[i].content
        node2 = Node("Content", id=id)
        graph.merge(node2, "Content", "id")
        graph.create(Relationship(node, "COVERED_IN", node2))
Ejemplo n.º 17
0
def mockMisConcepts():

    # neo4j graph connector
    authenticate("localhost:7474", "neo4j", "1sTep123")
    graph = Graph()

    cypher = graph.cypher
    # get a list of all content
    learnerDict = cypher.execute("MATCH (x:Learner) RETURN x.id as learner")
    conceptDict = cypher.execute("MATCH (x:Concept) RETURN x.id as concept")
    n = len(conceptDict)

    for learner in learnerDict:
        id = learner.learner
        node = Node("Learner", id=id)
        graph.merge(node, "Learner", "id")

        i = random.randint(0, n - 1)
        id = conceptDict[i].concept
        node2 = Node("Concept", id=id)
        graph.merge(node2, "Concept", "id")
        graph.create(Relationship(node, "HAS_MISCONCEPTION_IN", node2))
Ejemplo n.º 18
0
class Neo4jDBPipleline(object):
    def __init__(self):
        # self.db = Graph(host="localhost", user="******", password="******")
        db_info = settings.DB_INFO
        self.db = Graph(host=db_info["host"],
                        http_port=db_info["http_port"],
                        user=db_info["user"],
                        password=db_info["password"])

    def process_item(self, item, spider):
        """ 判断item的类型,并作相应的处理,再入数据库 """
        if isinstance(item, InformationItem):
            usr = Node("WeiboUser", **dict(item))
            self.db.merge(usr, "WeiboUser", "wb_usr_id")
        elif isinstance(item, TweetsItem):
            weibo = Node("WeiboTweets", **dict(item))
            self.db.merge(weibo, "WeiboTweets", "wb_tt_id")
            usr = Node("WeiboUser", wb_usr_id=weibo["wb_usr_id"])
            fan_follow_sb = Relationship(usr, "TWEETS", weibo)
            self.db.merge(fan_follow_sb)
        elif isinstance(item, FollowsItem):
            followsItems = dict(item)
            follows = followsItems.pop("follows")
            fan = Node("WeiboUser", wb_usr_id=followsItems["wb_usr_id"])
            for sb_id in follows:
                sb = Node("WeiboUser", wb_usr_id=sb_id)
                fan_follow_sb = Relationship(fan, "FOLLOWS", sb)
                self.db.merge(fan_follow_sb)
        elif isinstance(item, FansItem):
            fansItems = dict(item)
            fans = fansItems.pop("fans")
            sb = Node("WeiboUser", wb_usr_id=fansItems["wb_usr_id"])
            for fan_id in fans:
                fan = Node("WeiboUser", wb_usr_id=fan_id)
                fan_follow_sb = Relationship(fan, "FOLLOWS", sb)
                self.db.merge(fan_follow_sb)
        return item
Ejemplo n.º 19
0
 def fill_similarities_graph(self):
     authenticate(settings.NeoHost, settings.NeoLog, settings.NeoPass)
     graph = Graph("{0}/db/data/".format(settings.NeoHost))
     #graph.delete_all()
     try:
         graph.schema.create_uniqueness_constraint('Video', 'id')
     except:
         pass
     data = pd.DataFrame(self.db_game.read_videodata_from_db())
     if not isinstance(data, str) and not data.empty:
         data = data[pd.notnull(data['title'])]
         data = data[pd.notnull(data['rating'])]
         k = len(data)
         mes = smilarities.SimilarityMeasures()
         vid = 0
         while vid < k:
             if data['hashtags'][vid] is not None:
                 #print(data['hashtags'][vid], data['id'][vid])
                 if len(data['hashtags'][vid]) > 3:
                     hashes = self.hashtag_list_to_str(
                         data['hashtags'][vid])
                     #print(hashes, vid)
                     data1 = pd.DataFrame(
                         self.db_game.read_text_index_videodata_from_db(
                             'hashtags', hashes))
                     data1 = data1[pd.notnull(data1['title'])]
                     data1 = data1[pd.notnull(data1['rating'])]
                     data1 = data1.reset_index()
                     start = Node("Video", id=str(data['id'][vid]))
                     graph.merge(start)
                     start.properties['rating'] = data['rating'][vid]
                     start.properties['title'] = data['title'][vid]
                     start.push()
                     vid1 = 0
                     while vid1 < len(data1):
                         stop = Node("Video", id=str(data1['id'][vid1]))
                         graph.merge(stop)
                         stop.properties['rating'] = data1['rating'][vid1]
                         stop.properties['title'] = data1['title'][vid1]
                         stop.push()
                         num = mes.jaccard_similarity(
                             data['hashtags'][vid], data1['hashtags'][vid1])
                         #print(len(data['hashtags'][vid]))
                         if (num > 0.5 and len(data1['hashtags'][vid1]) > 3
                             ) and data1['id'][vid1] != data['id'][vid]:
                             #print(num, vid, vid1)
                             following = Relationship(
                                 start, "Jaccard", stop)
                             graph.merge(following)
                             following.properties[
                                 'jaccard_similarity'] = num
                             following.push()
                         vid1 += 1
             vid += 1
         #print(pd.DataFrame(graph.run("MATCH (a:Video) RETURN a.id, a.title, a.rating LIMIT 10").data()))
     return
Ejemplo n.º 20
0
class Neo4jCustomer(object):
    def __init__(self):
        self.redis_conn = redis.StrictRedis(
            host=REDIS_HOST,
            port=REDIS_PORT,
            password=REDIS_PARAMS["password"],
        )
        self.graph = Graph(NEO4J_URI, password=NEO4J_PWD)
        self.pending_queue = NEO4J_PENDING_QUEUE
        self.doing_queue = NEO4J_DOING_QUEUE

    def listen_task(self):
        todo_task = self.redis_conn.lpop(self.doing_queue)
        if todo_task:
            self.save_relationships(todo_task)

        while True:
            task = self.redis_conn.brpoplpush(self.pending_queue,
                                              self.doing_queue, 0)
            self.save_relationships(task)

    def save_relationships(self, task):
        self.graph.merge(pickle.loads(task))
        self.redis_conn.lpop(self.doing_queue)
Ejemplo n.º 21
0
def moveConceptMap():
    # neo4j graph connector
    graph = Graph()
    # delete entire graph

    url = "http://lp-sandbox.ekstep.org:8080/taxonomy-service/v2/analytics/domain/map"
    resp = requests.get(url).json()

    # move all concepts
    conceptList = resp["result"]["concepts"]
    for conceptDict in conceptList:
        identifier = None
        if (not conceptDict.has_key('identifier')):
            continue

        identifier = conceptDict['identifier']
        # create/find node
        node = Node("Concept", id=identifier)
        graph.merge(node, "Concept", "id")

        if (conceptDict.has_key('subject')):
            subject = conceptDict['subject']
            node.properties["subject"] = subject
            node.push()

        if (conceptDict.has_key('gradeLevel')):
            gradeLevel = conceptDict['gradeLevel']
            node.properties["gradeLevel"] = gradeLevel
            node.push()

        if (conceptDict.has_key('objectType')):
            objectType = conceptDict['objectType']
            node.properties["objectType"] = objectType
            node.push()

        # move all relations
        relationList = resp["result"]["relations"]
    for relationDict in relationList:

        if (not relationDict.has_key('startNodeId')):
            continue
        if (not relationDict.has_key('endNodeId')):
            continue
        if (not relationDict.has_key('relationType')):
            continue
        startNodeId = relationDict['startNodeId']
        endNodeId = relationDict['endNodeId']
        relationType = relationDict['relationType']
        print('A:', startNodeId, 'relationType', relationType, 'B:', endNodeId)
        node1 = Node("Concept", id=startNodeId)
        graph.merge(node1, "Concept", "id")
        node2 = Node("Concept", id=endNodeId)
        graph.merge(node2, "Concept", "id")
        graph.create(Relationship(node1, relationType, node2))
Ejemplo n.º 22
0
def db():

    with open('input.csv', 'r') as csv_file:
        csv_reader = csv.DictReader(
            csv_file)  # add , delimiter=',' to specify delimiter

        # next(csv_reader)  # skips over both header rows
        graph = Graph("bolt://localhost:7687", auth=("neo4j", "ubdprototype"))

        try:
            graph.run("Match () Return 1 Limit 1")
        except Exception:
            print(
                'Invalid connection. Is Neo4j running? Check username and password.'
            )
            raise Exception

        graph.delete_all()

        for line in csv_reader:

            topic = Node("Topic", name=line['topic'])
            application = Node("Application",
                               name=line['name'],
                               website=line['website'],
                               publication=line['publication'])

            dataset = Node("Dataset", identifier=line['identifier']
                           )  # may include a identifier TYPE property

            graph.merge(topic, "Topic", "name")
            graph.merge(application, "Application", "name")
            graph.merge(dataset, "Dataset", "identifier")

            graph.create(Relationship(application, "relates to", topic))
            graph.create(
                Relationship(application,
                             "uses",
                             dataset,
                             conf_level=line['conf-level']))

    return graph
Ejemplo n.º 23
0
class MadpyHabitsSurvey:
    def __init__(self):
        self.responses = google_survey.get('madpy-habits-survey.yaml')
        self.responses.set_index('question_id', inplace=True)

    def graph_survey(self):
        screen_names = self.responses.ix['q0', ['person_id', 'response']]
        self.pythonistas = {person_id: Node('Pythonista', screen_name=name)
                            for _, (person_id, name) in
                            screen_names.iterrows()}

        self.graph = Graph(password=environ['NEO4J_PASSWORD'])
        for node in self.pythonistas.values():
            self.graph.merge(node, label='Pythonista')

        self.graph_question('q1', 'Editor', 'TYPES_IN')
        self.graph_question('q2', 'Package', 'LIKES')
        self.graph_question('q3', 'VersionControl', 'USES')
        self.graph_question('q4', 'Language', 'KNOWS')

    def graph_question(self, question_id, node_label,
                                     relationship_label):

        def Response(node_value):
            return Node(node_label, name=node_value)

        responses = self.responses.ix[question_id, ['person_id', 'response']]
        response_nodes = {}  # nodes for unique responses
        relationships = []   # relationships between people and responses
        for _, (person_id, node_value) in responses.iterrows():
            pythonista = self.pythonistas[person_id]
            node = response_nodes.setdefault(node_value, Response(node_value))
            response = Relationship(pythonista, relationship_label, node)
            relationships.append(response)

        for node in response_nodes.values():
            self.graph.merge(node, label=node_label)

        for relationship in relationships:
            self.graph.merge(relationship, label=node_label)
Ejemplo n.º 24
0
 def _neo4j(self, article):
   uri = os.environ['NEO4J']
   u = os.environ['NEO4JUSER']
   p = os.environ['NEO4JPASSWD']
   # generate nodes
   news =  Node('NEWSITEMS', url=article['url'])
   orgs = [Node('ENTS', name=e, type='org') for e in article['ner']['org']]
   people = [Node('ENTS', name=e, type='people') for e in article['ner']['people']]
   # generate relationships
   relations_orgs = [Relationship(n, 'CONTAINED_IN', news) for n in orgs]
   relations_people = [Relationship(n, 'CONTAINED_IN', news) for n in people]
   # join
   relations = relations_orgs + relations_people
   # store
   neo4j = Graph(uri, username=u, password=p)
   neo4j.merge(news, 'NEWSITEMS', 'url')
   for n in orgs:
     neo4j.merge(n, 'ENTS', 'name')
   for n in people:
     neo4j.merge(n, 'ENTS', 'name')
   for r in relations:
     neo4j.create(r)
df = pd.read_csv(fl + "/Disease_Details.csv")
dfl = df[['Disease', 'Diagnosis_treatment']]
dfl = dfl.drop_duplicates()
dfl.sort_values(['Disease'], ascending=True)
records_ = dfl.to_dict(orient='records')

results = db.Disease.insert_many(records_)

df2 = pd.DataFrame(list(results.inserted_ids))
df2.columns = ['Object_id']
df = df.join(df2)

for index, row in df.iterrows():
    #print(row['Disease'], row['Object_id'])
    a = Node("Disease", name=row['Disease'], id=str(row['Object_id']))
    graph.merge(a, "Disease", "name")

df_symptoms = pd.read_csv(fl + "/Disease_Symptoms.csv")
df_symptoms = df_symptoms[['Disease', 'Symptoms']]
df_symptoms = df_symptoms.drop_duplicates()
df_symptoms = df_symptoms.dropna()

df_symptoms = df_symptoms.groupby('Disease')['Symptoms'].apply(
    list).reset_index(name='Symptoms_arr')

for index, row in df_symptoms.iterrows():
    query = {'Disease': {'$eq': row['Disease']}}
    update = {'$set': {'Symptoms.Name': row['Symptoms_arr']}}
    results_symptoms = db.Disease.update_one(query, update, upsert=True)
    print(f"Symptoms data inserted into MongoDB {results_symptoms}")
    for i in range(len(row['Symptoms_arr'])):
Ejemplo n.º 26
0
for index, row in wikiedit_df.iterrows():
    if pd.isnull(row["manual"]):
        page_name = row["wikisearch"]
    else:
        if row["manual"] == "None":
            continue
        else:
            page_name = row["manual"]
    entity_n = graph.nodes.match("Entity", entity_name = row["entity_name"]).first()
    wiki_n = Node("Wikipedia", page_name = page_name )
    wiki_n.__primarylabel__ = 'Wikipedia'
    wiki_n.__primarykey__ = 'page_name'
    wiki_n["url"]="https://fr.wikipedia.org/wiki/"+page_name
    OWNED_BY = Relationship.type("OWNED_BY")
    graph.merge(OWNED_BY(wiki_n, entity_n))
    print(entity_n["entity_name"], wiki_n["url"])

# %% Download  wiki pages (with infobox)as files from wiki nodes

results = graph.nodes.match("Wikipedia")
for wiki_n in results:
    # regarde si fichier existe. Si non,
    if os.path.isfile(wikipath + wiki_n['page_name']+".wikipage"):
        print(wiki_n['page_name']+".wikipage already fetched.")
        continue
    try:
        page = wptools.page(wiki_n['page_name'], lang ='fr')
        page.get_parse()
    except:
        print("error 1 with name:",wiki_n['page_name'])
Ejemplo n.º 27
0
class Command(BaseCommand):
    help = 'port group data from sql server to neo4j.'

    def __init__(self, *args, **kwargs):
        super(Command, self).__init__(*args, **kwargs)

        self._sql_server_conn = pymssql.connect(server='SX-DEV')

        self._init_graph()

    def handle(self, *args, **options):
        self._start_import()

    def _init_graph(self):
        self._graph = Graph(host=settings.NEO4J['HOST'],
                            http_port=settings.NEO4J['PORT'],
                            user=settings.NEO4J['USER'],
                            password=settings.NEO4J['PWD'])

    def _start_import(self):
        self.stdout.write('Start to migrate data from sql server to neo4j')

        group_db, person_db = self._get_databases()

        # create all the group nodes
        for db in group_db:
            for table in self._get_db_tables(db):
                self._create_group(db, table)

        # create all group users nodes and build relations
        for db in person_db:
            for table in self._get_db_tables(db):
                self._create_person(db, table)

        self._close_mssql_conn()

        self.stdout.write(
            self.style.SUCCESS(
                'Successfully imported all data to neo4j server'))

    def _close_mssql_conn(self):
        self._sql_server_conn.close()

    def _get_databases(self):
        cursor = self._sql_server_conn.cursor()
        cursor.execute('SELECT name FROM sys.databases;')
        dbs = cursor.fetchall()

        group_db = []
        person_db = []
        for db in dbs:
            db_name = db[0]
            if 'GroupData' in db_name:
                person_db.append(db_name)
            elif 'QunInfo' in db_name:
                group_db.append(db_name)

        return group_db, person_db

    def _get_db_tables(self, db_name):
        cursor = self._sql_server_conn.cursor()
        cursor.execute(
            "SELECT TABLE_NAME FROM %s.INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE';"
            % db_name)
        return [
            tb[0] for tb in cursor.fetchall()
            if 'QunList' in tb[0] or 'Group' in tb[0]
        ]

    def _create_group(self, db_name, table_name, start_id=0):
        curr_id = start_id
        cursor = self._sql_server_conn.cursor()
        cursor.execute('SELECT count(*) FROM %s.dbo.%s where id > %d' %
                       (db_name, table_name, start_id))

        total = cursor.fetchall()[0][0]

        cursor = self._sql_server_conn.cursor()
        cursor.execute('SELECT * FROM %s.dbo.%s where id > %d ORDER BY id' %
                       (db_name, table_name, start_id))

        pbar = tqdm(desc='Creating Group Nodes from [%s.%s]' %
                    (db_name, table_name),
                    total=total)
        try:
            g = cursor.fetchone()
            while g:
                curr_id = g[0]
                group = Group()
                group.number = g[1]
                group.mastqq = g[2]
                group.date = g[3]
                group.title = g[4]
                group.groupclass = g[5]
                group.intro = g[6]
                self._graph.merge(group)
                pbar.update(1)
                g = cursor.fetchone()
        except:
            print('Catch an Exception, resume group creating from id: %d' %
                  (curr_id - 1))
            pbar.close()
            self._init_graph()
            self._create_group(db_name, table_name, curr_id - 1)
        pbar.close()

    def _create_person(self, db_name, table_name, start_id=0):
        curr_id = start_id
        cursor = self._sql_server_conn.cursor()
        cursor.execute('SELECT count(*) FROM %s.dbo.%s where id > %d' %
                       (db_name, table_name, start_id))

        total = cursor.fetchall()[0][0]

        cursor = self._sql_server_conn.cursor()
        cursor.execute('SELECT * FROM %s.dbo.%s where id > %d ORDER BY id' %
                       (db_name, table_name, start_id))

        pbar = tqdm(desc='Creating Person Nodes and Relations from [%s.%s]' %
                    (db_name, table_name),
                    total=total)
        try:
            p = cursor.fetchone()
            while p:
                curr_id = p[0]
                person = Person()
                person.qq = p[1]
                person.nick = p[2]
                person.age = p[3]
                person.gender = p[4]
                person.auth = p[5]
                group_number = p[6]
                # get group node
                group = Group.select(self._graph, group_number).first()
                if group:
                    # build relations
                    person.groups.add(group)
                    group.members.add(person)
                    # update group node
                    self._graph.push(group)
                self._graph.merge(person)
                pbar.update(1)
                p = cursor.fetchone()
        except:
            print('Catch an Exception, resume person creating from id: %d' %
                  (curr_id - 1))
            pbar.close()
            self._init_graph()
            self._create_person(db_name, table_name, curr_id - 1)
        pbar.close()
Ejemplo n.º 28
0
from py2neo import Graph, Node, Relationship, authenticate

authenticate("localhost:7474", "neo4j", "cloudchaser")
graph = Graph("http://localhost:7474/db/data/")

graph.delete_all()

alice = graph.merge("Person", "name", "Alice")
bob = graph.merge("Person", "name", "Bob")
chelsea = graph.merge("Person", "name", "Chelsea")

prof = {
    'name': 'Dennis'
}

fav = {
    'name': 'Emma'
}

query = (
        'MERGE (profile:soundcloud {name: {profile}.name}) \
        ON CREATE SET profile={profile} '
        'MERGE (favorite:soundcloud {name: {favorite}.name}) \
        ON CREATE SET favorite={favorite} '
        )

graph.cypher.execute(query, {
                            'profile': prof,
                            'favorite': fav
                            }
                    )
        D0_outdegree = node['outdegree'],
        D0_creation_date = node['creation_date'],
        D0_indexing_status = node['indexing_status'],
        D0_crawled = node['crawled'],
        D0_last_modification_date = node[ 'last_modification_date'],
        D0_pages_crawled = node['pages_crawled'],
        D0_name = node['name'],
        D0_label = node['label'],
        D0_id = node['id']
        )
    if len(graph.nodes.match(site_name = node['label'])) !=0:
        print(node)
        a["site_name"]=node['label'] + "X" + str(random.randint(1000,9999))
    a.__primarylabel__ = 'Website'
    a.__primarykey__ = 'site_name'
    graph.merge(a)

for link in data_D0['links']:
    try:
        tx = graph.begin()
        source_n = graph.nodes.match(D0_id = link['source']).first()
        target_n = graph.nodes.match(D0_id = link['target']).first()
        rel = Relationship(source_n, "LINKS_TO", target_n)
        rel["count_D0"]=link['count']
        tx.merge(rel)
        tx.commit()
    except:
        pass
    # regarde "label"
    # si label déjà dans base
Ejemplo n.º 30
0
        
    for tweet in results['statuses']:
    
        tweetText = tweet['text'].encode('utf-8')
        tweetAuthor = tweet['user']['screen_name'].encode('utf-8')
        language = tweet['user']['lang'].encode('utf-8')
        if language != "en":
            continue
        if 'http' in tweetText:
            continue
        if 'RT @' in tweetText:
            continue
    
        register(tweetAuthor, tweetAuthor+tweetAuthor)
        user = graph.find_one('User', 'username',tweetAuthor)
        
        post = Node('Post', id=tweet['id'], title="Tweet", author=tweetAuthor,text=tweetText, timestamp=timestamp(), date=date())
        rel = Relationship(user, 'PUBLISHED', post)
    
        try:
            graph.create(rel)
        except Exception, e:
            continue
    
        print "New Tweet ("+language+"): "+tweetAuthor+":"+tweetText
        for hashtag in tweet['entities']['hashtags']:
            tag = Node('Tag', name=hashtag['text'])
            graph.merge(tag)
            rel = Relationship(tag, 'TAGGED', post)
            graph.create(rel)
Ejemplo n.º 31
0
class DataBase:
    def __init__(self):
        py2neo.authenticate("localhost:7474", "neo4j", "st1215")
        self.graph = Graph("http://localhost:7474/db/data/")

    def get_all_news_from(self, site):
        # news=set()
        all_news = self.graph.run(
            'MATCH (s:Site)-[:PUBLICOU]-(n:News)-[:E]-(t:Tipo) WHERE s.name="'
            + site + '" RETURN n,t').data()
        dataSet = list()
        for n in all_news:
            dataSet.append(
                (n['n']['title'],
                 removerAcentosECaracteresEspeciais(n['n']['content']),
                 n['t']['description']))

        return dataSet

    def get_all_news_from_no_class(self, site):
        all_news = self.graph.run(
            'MATCH (s:Site)-[:PUBLICOU]-(n:News) WHERE s.name="' + site +
            '" RETURN n').data()
        dataSet = list()
        for n in all_news:
            dataSet.append(
                (n['n']['title'],
                 removerAcentosECaracteresEspeciais(n['n']['content']), ''))

        return dataSet

    def get_news_by_title(self, title):
        all_news = self.graph.run(
            'MATCH (s:Site)-[:PUBLICOU]-(n:News) WHERE n.title="' + title +
            '" RETURN n').data()
        news = News()
        for n in all_news:
            news.title = n['n']['title']
            news.url = news.title = n['n']['url']

        return news

    def get_all_data_set(self, sites):
        dataSet = list()
        for s in sites:
            dataSet.extend(self.get_all_news_from(s))
        return dataSet

    # def get_queue(self, site_url):
    #     queue=set()
    #     for s in SiteQueue.select(self.graph).where(site=site_url):
    #         queue.add(s.page)
    #     return queue
    #
    # def save_queue(self, site, page):
    #     queue=SiteQueue()
    #     queue.site=site
    #     queue.page=page
    #     self.graph.push(queue)

    def get_site(self, name):
        sites = Site.select(self.graph).where(name=name)
        for site in sites:
            return site

    def get_clazz(self, name):
        tipos = Tipo.select(self.graph).where(description=name)
        for tipo in tipos:
            return tipo

    def save_site(self, site_name, url):
        site = Site()
        site.name = site_name
        site.url = url
        self.graph.push(site)

    def save_news(self, site, url, title, sub_title, content, tipo):
        s = self.get_site(site)
        t = self.get_clazz(tipo)
        news = News()
        news.site.add(s)
        news.tipo.add(t)
        news.title = title
        news.sub_title = sub_title
        news.content = content
        news.url = url
        self.graph.merge(news)

    def create_rel(self, node1, node2):
        self.graph.create("(s:Site)-[:PUBLICOU]->(n:News)")

    def install(self):
        self.graph.run("MATCH (n) DETACH DELETE n")
        self.graph.run("MATCH (n) DETACH DELETE n")

    def delete(self):
        self.graph.delete_all()
        tipo = Tipo()
        tipo.description = 'False'
        self.graph.merge(tipo)
        tipo = Tipo()
        tipo.description = 'True'
        self.graph.merge(tipo)
Ejemplo n.º 32
0
	return titlecase(''.join(random.choice(chars) for _ in range(size))) + " " + titlecase(''.join(random.choice(chars) for _ in range(size))) 
#for i in range(0,5):
#	Author_Generator()


	#a = Author_Generator()
	#a1=a
	#a = graph.merge_one("Author", "Name",a1 )

#r1 = random.randint(0,10)
for i in range(0,40):
	Author_name.append(Author_Generator())
	Author_id.append((i))
	a=Node("Author", ID=Author_id[i],Name=Author_name[i])
	#a.properties["Name"]=Author_name[i]
	graph.merge(a)


for i in range(0,5):
	r1 = random.randint(0,40)
	Author_id.append((i+40))
	Author_name.append(Author_name[r1])
	a=Node("Author", ID=Author_id[i+40],Name=Author_name[i+40])
	graph.merge(a)

for i in range(0,45):
	for j in range(0,45):
		r1 = random.randint(0,50)
		r2 = random.randint(5,25)
		if i == j:
			Matrix[i][j]=r2
Ejemplo n.º 33
0
for a in allTrades:
    if a.name in southeastList: a.division = "Southeast"
    elif a.name in atlanticList: a.division = "Atlantic"
    elif a.name in centralList: a.division = "Central"
    elif a.name in southwestList: a.division = "Southwest"
    elif a.name in northwestList: a.division = "Northwest"
    elif a.name in pacificList: a.division = "Pacific"

for j in range(0, len(matches)):
    #allTrades[j * 2].gave.update(allTrades[j * 2 + 1], properties={"month": tradeMonth[j], "year": 2016, "draft": allDraft[j][1]})
    #allTrades[j * 2 + 1].received.update(allTrades[j * 2], properties={"month": tradeMonth[j], "year": 2016, "draft": allDraft[j][0]})
    sgraph.merge(
        Relationship(allTrades[j * 2].__ogm__.node,
                     "TRADED",
                     allTrades[j * 2 + 1].__ogm__.node,
                     month=tradeMonth[j],
                     year=2016,
                     draft=allDraft[j][1]))
    sgraph.merge(
        Relationship(allTrades[j * 2 + 1].__ogm__.node,
                     "TRADED",
                     allTrades[j * 2].__ogm__.node,
                     month=tradeMonth[j],
                     year=2016,
                     draft=allDraft[j][0]))

# for t in teams:
#     sgraph.push(t)
options = {"Team": "name"}
	def process (self, parameters={}, data={} ):

		if 'verbose' in parameters:
			self.config['verbose'] = parameters['verbose']

		# for this facets, do not add additional entity to connect with, but write to properties of the entity
		properties = ['content_type_ss', 'content_type_group_ss', 'language_ss', 'language_s']
		
		host = 'localhost'
		if 'neo4j_host' in parameters:
			host = parameters['neo4j_host']

		user = '******'
		if 'neo4j_user' in parameters:
			user = parameters['neo4j_user']

		password = '******'
		if 'neo4j_password' in parameters:
			password = parameters['neo4j_password']
		
		graph = Graph(host=host, user=user, password=password)
		
		document_node = Node('Document', name = parameters['id'])

		if 'title' in data:
			document_node['title'] = data['title']

		# add properties from facets
		for entity_class in parameters['facets']:
			
			if entity_class in data:
				
				entity_class_label = parameters['facets'][entity_class]['label']

				if entity_class in properties:

					document_node[entity_class_label] = data[entity_class]

		graph.merge(document_node)
	
	
		# add / connect linked entities from facets
			
		for entity_class in parameters['facets']:
			
			if entity_class in data:

				entity_class_label = entity_class
				if parameters['facets'][entity_class]['label']:
					entity_class_label = parameters['facets'][entity_class]['label']

				if not entity_class in properties:
	
					relationship_label = entity_class_label
	
					if entity_class in ['person_ss','organization_ss', 'location_ss']:
						relationship_label = "Named Entity Recognition"
	
					# convert to array, if single entity / not multivalued field
					if isinstance(data[entity_class], list):
						entities = data[entity_class]
					else:
						entities = [ data[entity_class] ]
	
					for entity in entities:					
	
						if self.config['verbose']:
							print ("Export to Neo4j: Merging entity {} of class {}".format(entity, entity_class_label))
	
						# if not yet there, add the entity to graph
						entity_node = Node(entity_class_label, name = entity)
						graph.merge(entity_node)
						
						# if not yet there, add relationship to graph
						relationship = Relationship(document_node, relationship_label, entity_node)
						graph.merge(relationship)

		
		return parameters, data