Ejemplo n.º 1
0
def loadneonodes():
    graph = Graph(host='localhost', port=7687, password="******")

    #delete all the existing nodes
    graph.delete_all()

    #Read the csv file to load the CIty nodes
    query = """
    LOAD CSV WITH HEADERS FROM 'file:///C:/Users/Vinayak/neo4j-community-3.5.12/import/City_load_data.CSV' AS row
    CREATE (c:City {name:row.City,id:row.custom_cityid , Latitude:toFloat(row.Latitude), Longitude:toFloat(row.Longitude)})
    """

    graph.run(query)

    # Read the csv fle to load company nodes
    query = """
    LOAD CSV WITH HEADERS FROM 'file:///C:/Users/Vinayak/neo4j-community-3.5.12/import/Company_load_data.csv' AS row
    CREATE (cp:Company {name:row.Company,id:row.custom_companyid,custom_cityid:row.custom_cityid })
    """

    graph.run(query)

    # Read the csv file to load the relationship between company and cities
    query = """
    LOAD CSV WITH HEADERS FROM 'file:///C:/Users/Vinayak/neo4j-community-3.5.12/import/Relation_bw_comp_City.csv' AS row
    MATCH (cp:Company { id: row.custom_companyid}),(c:City { id: row.custom_cityid})
    CREATE (cp)-[:BELONGS { role: row.Belongs_to }]->(c)
    """

    graph.run(query)
Ejemplo n.º 2
0
    def kg_save(self):

        data = self.db.Conference.find({})
        dict = [i for i in data]
        # print(dict[0])
        graph = Graph('', user='', password='')
        graph.delete_all()
        tx = graph.begin()
        for i in dict:
            conference = Node('Conference', name=i["name"])
            conference['ddl'] = i['ddl']
            location = Node('Location',
                            name=self._location_to_country(i["location"]))
            about = Node('Discipline',
                         name=self._format_transfer(i['discipline']))
            tx.merge(conference,
                     primary_label='Conference',
                     primary_key='name')
            tx.merge(location, primary_label='Location', primary_key='name')
            tx.merge(about, primary_label='Discipline', primary_key='name')
            rel_about = Relationship(about, 'has_meeting', conference)
            rel_loc = Relationship(conference, 'at', location)
            rel_loc['duration'] = i['duration']
            tx.merge(rel_about)
            tx.merge(rel_loc)
            print('{} is saved'.format(rel_about))
            print('{} is saved'.format(rel_loc))

        tx.commit()
Ejemplo n.º 3
0
def upload(data):
    from py2neo import Node, Relationship
    from py2neo import Graph
    graph = Graph("http://localhost:7474",
                  username="******",
                  password="******")
    graph.delete_all()
    nodes = []
    for item in data:
        concept, pronunciation, pos2definition = extract_item_properties(item)
        node_tmp = Node("Prosthodontics", name=concept)
        node_tmp.properties["pronunciation"] = pronunciation
        cnt = 1
        for pos2def in pos2definition:
            node_tmp.properties["pos " + str(cnt)] = pos2def["pos"]
            #             node_tmp.properties["definition "+str(cnt)]=pos2def["definition"]
            for attribute, value in pos2def["attributes"].iteritems():
                node_tmp["def " + str(cnt) + " : " + attribute] = value
        graph.create(node_tmp)
        nodes.append(node_tmp)
    print "nodes create over , relation start to create"

    for node1 in nodes:
        properties = node1.properties.keys()
        for property in properties:
            if property[8:] == "cross_reference":
                for node2 in nodes:
                    if node2.properties["name"] == node1[property]:
                        graph.create(
                            Relationship(node1, "cross_reference", node2))
    print "graph create over"
Ejemplo n.º 4
0
	def createGraph(self):
		""" 
		form: 
		(self) --> print

		description: 
		function that creates the neo4j graph

		exemple:
		>>> graphWN..createGraph()
		creating graph...
		graph created

		"""
		print "creating graph..."
		graph = Graph()
		graph.delete_all()
		for synset in self.synset2synonym:
		  word_node = Node("word", literal=self.synset2word[synset])
		  #print synset, self.synset2synonym[synset]
		  #if graph.find(self.synset2word[synset])!=None:
		    #print "Exist"
		    #word_node=graph.find_one("word", 'literal', self.synset2word[synset])
		    #print word_node
		  synset_node = Node("synset", name=synset)
		  word_has_synset = Relationship(word_node, "has_synset", synset_node)
		  if self.synset2synonym[synset][0]!='_EMPTY_':
		    for synonym in self.synset2synonym[synset]:
		      word_syn = Node("word", literal=synonym)
		      synset_has_synonym = Relationship(synset_node, "has_synonym", word_syn)
		      graph.create(synset_has_synonym)
		  graph.create(word_has_synset)
		print "graph created"
def main():
    g = Graph(auth=('neo4j', 'emma15emma'))
    g.delete_all()
    # loadTeams(g,"./teams.dat")
    # loadGames(g,"./games.dat")
    dupliGames(g, "./teams.dat", "./games.dat")
    print('Data loaded!')
Ejemplo n.º 6
0
class MyGraph(object):
    def __init__(self):
        self.graph = Graph('bolt://localhost:7687',
                           user='******',
                           password='******')
        self.mather = NodeMatcher(self.graph)
        self.node_name = 'JIKE_user'
        self.FOLLOWER_REL = 'FOLLOWER'
        self.FOLLOWING_REL = 'FOLLOWING'

    def add_a_rel(self, node_a, node_b, rel_type='FOLLOWER'):
        ab = Relationship(node_a, rel_type, node_b)
        self.graph.create(ab)

    def add_a_person(self, user):
        if self.search_a_person(username=user.username):
            return
        node = Node(self.node_name, **user.to_dict())
        self.graph.create(node)

    def search_a_person(self, **properties):
        return self.mather.match(self.node_name, **properties).first()

    def flush(self):
        self.graph.delete_all()
Ejemplo n.º 7
0
class Test:
    def __init__(self):
        self.L1 = ['C语言','数组']
        self.g = Graph("http://localhost:7474", username="******", password="******")
        self.g.delete_all()
    
    '''建立节点'''
    def create_node(self, label, nodes):
        for node_name in nodes:
            node = Node(label, name=node_name)
            self.g.create(node)
            print(len(nodes))  
        return


    def createRels(self, start_node, end_node, edges, rel_type, rel_name):
        count = 0
        p = edges[0]
        q = edges[1]
        query = "match(p:%s),(q:%s) where p.name='%s'and q.name='%s' create (p)-[rel:%s{name:'%s'}]->(q)" % (start_node, end_node, p, q, rel_type, rel_name)
        try:
            self.g.run(query)
            count += 1
            print(rel_type, count, all)
        except Exception as e:
            print(e)
class Neo4jUtil(object):

    def __init__(self, url, username, password):

        # 验证参数需要去掉http前缀
        httpPrefix = "http://"
        # 设置登陆验证参数
        authenticate(url.replace(httpPrefix, ""), username, password)
        # 连接neo4j数据库
        self.graph = Graph(url + '/db/data/')

    def empty(self):
        self.graph.run("match (n) detach delete n")
    # 创建节点
    def create_column_node(self, node_name):
        match_result = self.graph.run("MATCH (a:Column) WHERE a.name = '%s' RETURN a" % node_name).data()
        if len(match_result) == 0:
            self.graph.run("CREATE (n:Column { name: '%s' })" % node_name)
    # 创建关系
    def create_column_relation(self, origin_name, dest_name):
        match_result = self.graph.run(
            "MATCH (a:Column)-[r:beDepColumn]->(b:Column) WHERE a.name = '%s' AND b.name = '%s' RETURN r" %
            (origin_name, dest_name)).data()
        if len(match_result) == 0:
            self.graph.run("""MATCH (a:Column),(b:Column)
                            WHERE a.name = '%s' AND b.name = '%s'
                            CREATE (a)-[r:beDepColumn]->(b)""" % (origin_name, dest_name))
    # 清空库
    def delete_all(self):
        self.graph.delete_all()
Ejemplo n.º 9
0
class Data(object):
    def __init__(self):
        self.g = Graph(host="127.0.0.1",
                       http_port=7474,
                       user="******",
                       password="******")
        self.matcher = NodeMatcher(self.g)

    def clear(self):
        self.g.delete_all()

    def first_init(self):
        self.g.delete_all()
        matcher = NodeMatcher(self.g)
        with open('aaa.txt', encoding='utf-8') as f:
            for i in f.readlines():
                i = i.strip()
                r = m.match(i)
                if r:
                    print(r.groups())
                    #('A1 -> A2', '是') or ('A1', '潮流收敛调整')
                    r2 = m2.match(r.group(1))
                    if r2:
                        #print(r2.groups())
                        #('A1', 'A3')
                        n1 = matcher.match(r2.group(1)).first()
                        n2 = matcher.match(r2.group(2)).first()
                        tempN = Relationship(n1, r.group(2), n2)
                    else:
                        tempN = Node(r.group(1), name=r.group(2))
                    self.g.create(tempN)
Ejemplo n.º 10
0
    def createGraph(self):
        """ 
		form: 
		(self) --> print

		description: 
		function that creates the neo4j graph

		exemple:
		>>> graphWN..createGraph()
		creating graph...
		graph created

		"""
        print "creating graph..."
        graph = Graph()
        graph.delete_all()
        for synset in self.synset2synonym:
            word_node = Node("word", literal=self.synset2word[synset])
            #print synset, self.synset2synonym[synset]
            #if graph.find(self.synset2word[synset])!=None:
            #print "Exist"
            #word_node=graph.find_one("word", 'literal', self.synset2word[synset])
            #print word_node
            synset_node = Node("synset", name=synset)
            word_has_synset = Relationship(word_node, "has_synset",
                                           synset_node)
            if self.synset2synonym[synset][0] != '_EMPTY_':
                for synonym in self.synset2synonym[synset]:
                    word_syn = Node("word", literal=synonym)
                    synset_has_synonym = Relationship(synset_node,
                                                      "has_synonym", word_syn)
                    graph.create(synset_has_synonym)
            graph.create(word_has_synset)
        print "graph created"
Ejemplo n.º 11
0
class KnowledgeGraph(object):
    def __init__(self):
        conn_neo4j = ConnectionNeo4j()
        self._graph = Graph(host=conn_neo4j.ip,
                            auth=(conn_neo4j.username, conn_neo4j.password))

    def __repr__(self):
        self._object = "[INFO] The neo4j version is {}.".format(
            py2neo.__version__)

    def load_file(self, cypher):
        self._graph.run(cypher)

    def add_node(self, labels, **kwargs):
        node = Node(labels, **kwargs)
        self._graph.create(node)

    def delete_node(self):
        self._graph.delete_all()

    # def find(self, label):
    #     return self._graph.find_one(label=label)

    def find(self):
        data = self._graph.data('MATCH (p:F**K) return p')
        df = pd.DataFrame(data)
        print(df)

    def match(self):
        pass
Ejemplo n.º 12
0
    def neo_viz_multiple_domains_level(self, domains, df=None):
        '''
        Take neo4j_viz_one_domain to multiple domains so that we can compare and see overlapped backlinks between multiple domains;
        Only visualize the domains
        '''
        neo_args = self.api_args['neo4j']
        graph = Graph(uri=neo_args['uri'],
                      auth=(neo_args['user_name'], neo_args['password']))
        graph.delete_all()
        if df is None:
            df = self.results['majestic']
        df = df[df["linkingToDomain"].isin(domains)]

        for d in domains:
            graph.run("create (d:Domain{url:'" + d + "'})")
        #Add backlinks domains (bd)
        for backlink_domain in df['linkingFromDomain'].unique():
            graph.run("create (bd:Backlinks_Domain" + "{url:'" +
                      backlink_domain + "'})")

        #Add link referral relationships
        for index, row in df.iterrows():
            graph.run("match (bd:Backlinks_Domain" + "{url:'" +
                      row['linkingFromDomain'] + "'}), (d:Domain{url:'" +
                      row['linkingToDomain'] +
                      "'}) create (bd)-[:Refers]->(d)")
        print(
            "please go to your Neo4j browser and run `match (n) return n` and display the graph"
        )
Ejemplo n.º 13
0
def main():
    if not has_py2neo:
        sys.exit("[!] py2neo must be installed for this script.")

    if not has_evtx:
        sys.exit("[!] python-evtx must be installed for this script.")

    if not has_lxml:
        sys.exit("[!] lxml must be installed for this script.")

    if not has_numpy:
        sys.exit("[!] numpy must be installed for this script.")

    if not has_changefinder:
        sys.exit("[!] changefinder must be installed for this script.")

    if not has_pandas:
        sys.exit("[!] pandas must be installed for this script.")

    if not has_hmmlearn:
        sys.exit("[!] hmmlearn must be installed for this script.")

    if not has_sklearn:
        sys.exit("[!] scikit-learn must be installed for this script.")

    try:
        graph_http = "http://" + NEO4J_USER + ":" + NEO4J_PASSWORD + "@" + NEO4J_SERVER + ":" + NEO4J_PORT + "/db/data/"
        GRAPH = Graph(graph_http)
    except:
        sys.exit("[!] Can't connect Neo4j Database.")

    print("[*] Script start. %s" %
          datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S"))

    if args.run:
        try:
            app.run(threaded=True, host="0.0.0.0", port=WEB_PORT)
        except:
            sys.exit("[!] Can't runnning web application.")

    # Delete database data
    if args.delete:
        GRAPH.delete_all()
        print(
            "[*] Delete all nodes and relationships from this Neo4j database.")

    if args.evtx:
        for evtx_file in args.evtx:
            if not os.path.isfile(evtx_file):
                sys.exit("[!] Can't open file {0}.".format(evtx_file))
        parse_evtx(args.evtx)

    if args.xmls:
        for xml_file in args.xmls:
            if not os.path.isfile(xml_file):
                sys.exit("[!] Can't open file {0}.".format(xml_file))
        parse_evtx(args.xmls)

    print("[*] Script end. %s" %
          datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S"))
Ejemplo n.º 14
0
def createGraph():
    global relationships

    graph = Graph()
    graph.delete_all()
    visualize()
    for relationship in relationships:
        graph.create(relationship) 
Ejemplo n.º 15
0
def create_address_relation_network(files_folder_path: str):
    json_reader = BitcoinJsonReader()
    graph = Graph(password="******", port=11004)

    graph.delete_all()
    for file_name in os.listdir(files_folder_path):
        read_addresses_relation_network_time_span_into_graph(
            graph, json_reader, os.path.join(files_folder_path, file_name))
Ejemplo n.º 16
0
def connect():
    host = input("Please enter the neo4j database server IP (defaults to localhost): ")
    host = 'localhost' if len(host.strip(' ')) == 0 else host
    password = input("Please enter the neo4j user's database password: "******"bolt://{}:7687".format(host), user='******',
                  password=password)  # this may need to change when running from the server since there it needs the instance IP
    graph.delete_all()
    return graph
Ejemplo n.º 17
0
def conn_neo4j():
    global g
    g = Graph(
        "http://localhost:7474",
        username="******",
        password="******"
    )
    g.delete_all()
Ejemplo n.º 18
0
class GraphImporter(object):
  def __init__(self, graphurl, commitEvery=100):
    self.graph = Graph(graphurl)
    self.commitEvery = commitEvery
    self._act = None
    self._actC = commitEvery

  def delete_all(self):
    self.graph.delete_all()
    self.graph.cypher.run('CREATE INDEX ON :_Network_Node(id)')
    self.graph.cypher.run('CREATE INDEX ON :_Set_Node(id)')

  def _tx(self):
    if self._act is not None:
      return self._act
    self._act = self.graph.cypher.begin()
    self._actC = self.commitEvery
    return self._act

  def _done(self):
    self._actC -= 1
    if self._actC == 0:  # commit
      self._act.process()
      self._act.commit()
      sys.stdout.write('.')
      self._act = None

  def _close(self):
    if self._act is not None:  # commit last tx
      self._actC = 1
      self._done()

  def add_node(self, labels, node_id, properties):
    tx = self._tx()
    add_node(tx, labels, node_id, properties)
    self._done()

  def done_nodes(self):
    self._done()

  def append(self, query):
    tx = self._tx()
    tx.append(query)
    self._done()


  def add_edge(self, label, source_node_id, target_node_id, properties, source_type=u'_Network_Node', update_only=False):
    tx = self._tx()
    add_edge(tx, label, source_node_id, target_node_id, properties, source_type, update_only)
    self._done()

  def __call__(self, query):
    tx = self._tx()
    tx.append(query)
    self._done()

  def finish(self):
    self._close()
Ejemplo n.º 19
0
class HetioGraph:
    def __init__(self, user=None, password=None):
        self.graph = Graph(user=user, password=password)
        pd.set_option('display.max_rows', 200000)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.commit()

    def commit(self):
        return self.graph.begin().commit
    
    def execute_cypher(self, query):
        return self.graph.run(query)

    def create_graph_nodes(self):
        print('CREATING NODES')    

        self.execute_cypher(node_import_query)    

    def create_graph_edges(self):
        print('CREATING EDGES')
        self.execute_cypher(edge_import_query)

    def create_node_labels(self):
        for query in label_queries:
            self.execute_cypher(query)
            self.commit()         

    def create_relationship_labels(self):
        for query in relationship_queries:
            self.execute_cypher(query)
            self.commit()

    def initialize_graph(self):
        print('INITIALIZING GRAPH')
        self.clear_database()
        self.create_graph_nodes()
        self.create_graph_edges()
        self.create_relationship_labels()
        self.create_node_labels()

    def discover_new_treatments(self):
        print('FINDING NEW TREATMENTS')
        data = self.execute_cypher(discover_new_treatments_query).data()
        df = pd.DataFrame(data)
        df.reset_index(drop=True,inplace=True)
        
        print(df)

    def clear_database(self):
        print('CLEARING GRAPH')
        self.graph.delete_all()
        
Ejemplo n.º 20
0
class GraphMaker(object):
    '''

        neo4j: (https://10-0-1-111-33931.neo4jsandbox.com/browser/)
            Entire triple: 
                CREATE (Keanu:Person {name:'Keanu Reeves', born:1964})-[:ACTED_IN {roles:['Neo']}]->(TheMatrix:Movie {title:'The Matrix', released:1999, tagline:'Welcome to the Real World'})
                MATCH(N) RETURN N

            Create node: 
                CREATE (n:Page {title:'Finance', url:'https://en.wikipedia.org/wiki/Finance'})

            Get node (as "n")
                match(n:Page {title: "Finance"})

            node = self.graph.evaluate("match (n:Section) where n.title='See also' return n")


    '''
    def __init__(self):
        authenticate("localhost:7474", "neo4j", "ece406")
        self.graph = Graph("http://localhost:7474/db/data/")
        self.graph.delete_all()

    def appendNode(self, node):
        self.graph.create(node)

    def appendNodes(self, *nodes):
        for node in nodes:
            self.graph.create(node)

    def makeRelationship(self, subjectnode, propertystring, objectnode):
        self.graph.create(Relationship(subjectnode, propertystring,
                                       objectnode))

    def drawGraph(self):
        options = {"Page": "title", "Section": "title"}
        draw(self.graph, options)

    def getData(self, querystring=None):
        if querystring is None:
            querystring = "match (n) return n"
        return self.graph.data(querystring)

    def printData(self, querystring=None):
        data = self.getData(querystring)
        for d in data:
            print(d)

    def getNodeByTitle(self, nodeTitle):
        node = self.graph.evaluate("match (n:Section) where n.title='" +
                                   nodeTitle + "' return n")
        if node:
            return node
        else:
            print("No node by that title")
            return
Ejemplo n.º 21
0
def main():
    if not has_py2neo:
        sys.exit("[!] py2neo must be installed for this script.")

    if not has_evtx:
        sys.exit("[!] python-evtx must be installed for this script.")

    if not has_lxml:
        sys.exit("[!] lxml must be installed for this script.")

    if not has_lxml:
        sys.exit("[!] numpy must be installed for this script.")

    if not has_changefinder:
        sys.exit("[!] changefinder must be installed for this script.")

    if not has_flask:
        sys.exit("[!] Flask must be installed for this script.")

    try:
        graph_http = "http://" + NEO4J_USER + ":" + NEO4J_PASSWORD + "@" + NEO4J_SERVER + ":" + NEO4J_PORT + "/db/data/"
        GRAPH = Graph(graph_http)
    except:
        sys.exit("[!] Can't connect Neo4j Database.")

    print("[*] Script start. %s" %
          datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S"))

    if args.run:
        try:
            app.run(host="0.0.0.0", port=WEB_PORT)
        except:
            sys.exit("[!] Can't runnning web application.")

    # Delete database data
    if args.delete:
        GRAPH.delete_all()
        print(
            "[*] Delete all nodes and relationships from this Neo4j database.")

    if args.evtx:
        evtx_file = args.evtx
        try:
            os.path.exists(evtx_file)
        except IOError:
            sys.exit("[!] Can't open file {0}.".format(evtx_file))

        fb = open(evtx_file, "rb")
        fb_data = fb.read()[0:8]
        if fb_data != EVTX_HEADER:
            sys.exit("[!] This file is not EVTX format {0}.".format(evtx_file))
        fb.close()
        parse_evtx(evtx_file, GRAPH)

    print("[*] Script end. %s" %
          datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S"))
Ejemplo n.º 22
0
def setup_neo4j():
    g = Graph()
    if delete_all:
        logging.info('Deleting all Neo4j content...')
        g.delete_all()
    logging.debug('Verifying Neo4j schema...')
    g.run('CREATE CONSTRAINT ON (n:Event) ASSERT n.id IS UNIQUE')
    g.run('CREATE INDEX ON :Event(timestamp)')
    g.run('CREATE INDEX ON :Event(ingest_time)')
    return g
Ejemplo n.º 23
0
class Friends(object):
	def __init__(self, uri, username, password):
		self.neo = Graph(uri)
		self.uri = uri
		self.username = username
		self.password = password
	
	def create_person(self, name):
		node = Node("Person", name=name)
		self.neo.create(node)
		return node

	def make_mutual_friends(self, node1, node2):
		relationship = Relationship(node1, "FRIENDS_WITH", node2)
		relationship2 = Relationship(node2, "FRIENDS_WITH", node1)
		self.neo.create(relationship)
		self.neo.create(relationship2)

	def suggestions_for(self, node):
		returnType = "node"

		payload = {
			"order": "breadth_first",
			"uniqueness": "node_global",
			"relationships": {
				"type": "FRIENDS_WITH",
				"direction": "in"
			},
			"return_filter" : {
				"body" : "position.length() == 2;",
				"language" : "javascript"
			},
			"max_depth": 2
		}

		payload = json.dumps(payload)

		headers = {
			"Accept": "application/json; charset=UTF-8",
			"Authorization": "Basic bmVvNGo6cGFzc3dvcmQ=",
			"Content-Type": "application/json"
		}
		
		uri = self.uri + "node/" + str(node._id) + "/traverse/" + returnType
		res = requests.post(uri, data=payload, headers=headers).json()

		recommendations_list = []
		for el in res:
			recommendations_list.append(el["data"]["name"])
		recommendations = ', '.join(recommendations_list)

		return recommendations

	def reset(self):
		self.neo.delete_all()
Ejemplo n.º 24
0
class neoGraph():
    def __init__(self):
        # 连接数据库
        self.graph = Graph("http://localhost:7474",
                           username="******",
                           password='******')
        self.data_message_list = []

    def read_data(self):
        with open('data.csv', 'r', encoding='utf-8') as f:
            reader = csv.reader(f)
            self.data_message_list = list(reader)

    def create_node(self):
        # 创建主节点
        main_node = Node(main_node_label, name=main_node_label)
        self.graph.create(main_node)
        i = 1
        for each_message in self.data_message_list:
            # 创建节点
            movie_node = Node(movie_name_label, name=each_message[0])
            self.graph.create(movie_node)
            actor_node = Node(actor_name_label, name=each_message[-1])
            self.graph.create(actor_node)
            direct_node = Node(director_name_label, name=each_message[1])
            self.graph.create(direct_node)
            # 创建关系
            print('no.')
            print(i)
            movie_to_main = Relationship(movie_node, 'no.' + str(i), main_node)
            i += 1

            self.graph.create(movie_to_main)
            actor_to_movie = Relationship(actor_node, '一句话电影', movie_node)
            self.graph.create(actor_to_movie)
            direct_to_movie = Relationship(direct_node, '导演', movie_node)
            self.graph.create(direct_to_movie)

    def create_rel(self):
        matcher = NodeMatcher(self.graph)
        main_node = matcher.match(main_node_label, name=main_node_label)
        # 建立主节点和电影名的联系
        i = 1
        for each_movie in movie_name_list:
            movie_node = matcher.match(movie_name_label, name=each_movie)
            movie_to_main = Relationship(movie_node, 'no.' + str(i), main_node)
            i += 1
            self.graph.create(movie_to_main)
        # 建立导演和电影名的联系

        # 建立主演和电影名的联系

    def clean_node(self):
        # 清空数据库
        self.graph.delete_all()
Ejemplo n.º 25
0
class Friends(object):
    def __init__(self, uri, username, password):
        self.neo = Graph(uri)
        self.uri = uri
        self.username = username
        self.password = password

    def create_person(self, name):
        node = Node("Person", name=name)
        self.neo.create(node)
        return node

    def make_mutual_friends(self, node1, node2):
        relationship = Relationship(node1, "FRIENDS_WITH", node2)
        relationship2 = Relationship(node2, "FRIENDS_WITH", node1)
        self.neo.create(relationship)
        self.neo.create(relationship2)

    def suggestions_for(self, node):
        returnType = "node"

        payload = {
            "order": "breadth_first",
            "uniqueness": "node_global",
            "relationships": {
                "type": "FRIENDS_WITH",
                "direction": "in"
            },
            "return_filter": {
                "body": "position.length() == 2;",
                "language": "javascript"
            },
            "max_depth": 2
        }

        payload = json.dumps(payload)

        headers = {
            "Accept": "application/json; charset=UTF-8",
            "Authorization": "Basic bmVvNGo6cGFzc3dvcmQ=",
            "Content-Type": "application/json"
        }

        uri = self.uri + "node/" + str(node._id) + "/traverse/" + returnType
        res = requests.post(uri, data=payload, headers=headers).json()

        recommendations_list = []
        for el in res:
            recommendations_list.append(el["data"]["name"])
        recommendations = ', '.join(recommendations_list)

        return recommendations

    def reset(self):
        self.neo.delete_all()
Ejemplo n.º 26
0
class DiseasePipeline(object):

    def __init__(self):
        self.graph = Graph(NEO4J_URL, auth = (NEO4J_USERNAME, NEO4J_PASSWORD))
        self.graph.delete_all()
        # self.file = open('test.txt', "a+")
    def process_item(self, item, spider):
        # self.file.write(str(item) + '\n\n')
        # self.file.flush()

        item['name'] = item['name'].strip()

        node = self.graph.nodes.match('disease', name = item['name']).first()
        if node is None:    # 如果不存在这种疾病,那就创建它
            node = Node('disease', **item)
            self.graph.create(node)
            node = self.graph.nodes.match('disease', name = item['name']).first()
        else:               # 如果已经存在了这个疾病,那就更新它
            node.update(item)
            self.graph.merge(node, 'disease', 'name')

        # 建立相关疾病的联系
        relatedDiseases = item['relatedDisease']
        for disease in relatedDiseases:
            disease = disease.strip()
            newNode = self.graph.nodes.match('disease', name = disease).first()

            if newNode is None:    # 如果不存在这种疾病,那就创建它,从而能够建立联系
                newNode = Node('disease', name = disease)
                self.graph.create(newNode)
                newNode = self.graph.nodes.match('disease', name = disease).first()

            # 查询两种疾病之间是否存在相关联系,若不存在,则创建这个联系
            r = Relationship(node, "ralate", newNode)
            if self.graph.match_one((node, newNode), r_type = 'relate') is None:
                self.graph.create(r)
        
        # 建立疾病与症状之间的联系
        symptoms = item['typicalSymptom'].split('、')
        for symptom in symptoms:
            symptom = symptom.strip() # 消除多余的空格
            newNode = self.graph.nodes.match('symptom', name = symptom).first()
            
            if newNode is None: # 如果不存在这个症状,那就创建它
                newNode = Node('symptom', name = symptom)
                self.graph.create(newNode)
                newNode = self.graph.nodes.match('symptom', name = symptom).first()
                       
            # 查询两种疾病之间是否存在伴随联系,若不存在,则创建这个联系
            r = Relationship(node, 'have', newNode)
            if self.graph.match_one((node, newNode), r_type = 'have') is None:
                self.graph.create(r)
Ejemplo n.º 27
0
def main():
    parser = CustomParser()
    parser.add_argument("--input", "-i",
                        help="sets input folder")

    args = parser.parse_args()
    # print args.input


    extract_codex_data(args.input)
    # codex_extractor.main(args)

    graph_http = "http://" + NEO4J_USER + ":" + \
        NEO4J_PASSWORD + "@:" + NEO4J_PORT + "/db/data/"
    GRAPH = Graph(graph_http)

    GRAPH.delete_all()
    with GRAPH.begin() as tx:
        data = get_tools_info(args.input)
        for apt in data:

            add_apt(tx, apt)
            for file in data[apt]:
                add_mw(tx, file['MD5'],
                       file['SHA1'],
                       file['SHA256'],
                       file['Description'],
                       file['File_Name'],
                       file['Compilation_Time']
                       )
                add_apt_mw_rel(tx, apt, file['MD5'])
                for pdb in file['PDB']:
                    add_pdb_mw_rel(tx, pdb, file['MD5'])
                try:
                    add_mw_signature(tx, file['Signatures'], file['MD5'])
                    for st in file['Interesting']:
                        add_int_mw_rel(tx, st, file['MD5'])
                except KeyError:
                    print file['MD5'] + " error"
                    print KeyError.message
                    pass
                try:
                    for macro in file['Office']:
                        add_macro_mw_rel(tx, macro, file['MD5'])
                    add_office_last_saved(tx, file['Last_Saved'], file['MD5'])
                except Exception:
                    pass
        tx.run(cypher_clean_empty_comp)
        tx.run(cypher_clean_empty_filetype)
        tx.run(cypher_clean_empty_filename)
        tx.run(cypher_clean_empty_pdb)
        tx.run(cypher_clean_empty_last_saved)
Ejemplo n.º 28
0
class Neo4j():
    def __init__(self):
        self.tp = textProcess.TextProcess()
        self.graph = Graph("http://localhost:7474",
                           username="******",
                           password='******')
        self.graph.delete_all()

    def connect_db(self):
        return pymysql.connect(host='192.168.1.101',
                               port=3306,
                               user='******',
                               password='',
                               database='xinhua',
                               charset='utf8')

    def query_news(self, sql_str):
        logging.info(sql_str)
        con = self.connect_db()
        cur = con.cursor()
        cur.execute(sql_str)
        rows = cur.fetchall()
        cur.close()
        con.close()
        return rows

    def test(self):
        rows = self.query_news("select * from news limit 100")
        for row in rows:
            title = row[3]
            content = row[4]
            catalog = row[6]
            keywords = list(self.tp.tfidf(title + content))
            date = time.mktime(row[2].timetuple())
            date_str = row[2].strftime("%Y-%m-%d %H:%S:%M")
            tx = self.graph.begin()
            news = Node('news',
                        title=title,
                        content=content,
                        date=date_str,
                        catalog=catalog,
                        url=row[5],
                        mktime=date)
            # b = Node('PersonTest', name='张三1', key='s1', age=33)
            tx.merge(news, 'news', 'title')

            for k in keywords:
                keyword = Node('keyword', key=k)
                tx.merge(keyword, 'keyword', 'key')
                relation = Relationship(news, 'include', keyword)
                tx.create(relation)
            tx.commit()
Ejemplo n.º 29
0
def init_db(delete = False):
    """ initializes the db connection and returns the graph object.
    TODO: Enable variables
    """
    uri = "bolt://localhost:7687"
    print("Connecting to database at %s.." % uri)
    graph = Graph(uri, auth=("neo4j", "admin"))
    print("Connected.")
    if delete:
        print("Deleting graph at %s.." % uri)
        graph.delete_all()
        print("Deleted")
    return graph
Ejemplo n.º 30
0
def main1():
    authenticate("localhost:7474", "neo4j", "1234")
    graph = Graph(GRAPH_CONNECTION_STRNIG)

    graph.delete_all()

    banana = Node("Fruit", name="banana", colour="yellow", tasty=True)
    graph.create(banana)

    t = graph.merge_one("Fruit", 'name', 'apple')
    t['colour'] = 'green'
    t['tasty'] = True
    t.push()
Ejemplo n.º 31
0
def get_graph(new_graph=True):
    # load / declare the database
    # inputs: new_graph - if true then existing graph is cleared
    # outputs: graph - a py2neo graph object

    graph = Graph("bolt://localhost:7687",
                  user="******",
                  password="******")
    graph.begin()

    if new_graph == True:
        graph.delete_all()

    return graph
Ejemplo n.º 32
0
class Build_Configuration():
    def __init__(self):
        self.graph = Graph()
        self.graph.delete_all()
        self.namespace = ["Start"]
        self.parent_node = []

    def check_duplicates(self, label, name):
        #print "label",label,name
        if self.graph.find_one(label, property_key="name",
                               property_value=name) != None:
            raise ValueError("Duplicate Node", label, name)

    def get_namespace(self, name):
        print self.namespace, name
        temp = copy.deepcopy(self.namespace)
        temp.append(name)
        return_value = "/".join(temp)
        return return_value

    def get_parent_node(self):
        return self.parent_node[-1]

    def pop_namespace(self):
        del self.namespace[-1]
        del self.parent_node[-1]

    # concept of namespace name is a string which ensures unique name
    # the name is essentially the directory structure of the tree
    def construct_node(self, push_namespace, relationship, label, name,
                       properties):
        namespace = self.get_namespace(name)

        self.check_duplicates(label, name=namespace)

        node = Node(label)
        node.properties["namespace"] = namespace
        node.properties["name"] = name
        for i in properties.keys():
            node.properties[i] = properties[i]
        self.graph.create(node)
        if len(self.parent_node) != 0:
            relation_enity = Relationship(self.get_parent_node(), relationship,
                                          node)

            self.graph.create(relation_enity)

        if push_namespace == True:
            self.namespace.append(name)
            self.parent_node.append(node)
Ejemplo n.º 33
0
def conn_neo4j():
    logger.info("Connect DB...")
    global g
    try:
        g = Graph("http://localhost:7474",
                  username="******",
                  password="******")
        g.delete_all()
        logger.info("Delete all nodes & relations...")
        logger.info("Connect successfully...")
    except:
        print("Connect DB Error...")
        # traceback.print_exc()
        sys.exit(0)
Ejemplo n.º 34
0
def set_config():
    global xunyiwenyao_level,xunyiwenyao_node,linchuang_medicine_level,linchuang_medicine_node,entity_num,relation_num,buwei,keshi,graph,matcher
    graph = Graph("http://localhost:7474", username="******", password='******')
    matcher = NodeMatcher(graph)
    graph.delete_all()
    xunyiwenyao_level = [] #寻医问药中所有的层级结构,用来查询,不区分疾病和症状
    xunyiwenyao_node = [] #科室与部位的节点数据
    linchuang_medicine_level = []
    linchuang_medicine_node = []
    entity_num = {'科室':0,'部位':0,'临床分级':0}
    relation_num= {'包括':0}
    buwei = ['全身', '男性股沟', '颈部', '眼', '生殖部位', '下肢', '口', '上肢', '腰部', '耳', '四肢', '腹部', '头部', '皮肤', '女性盆骨', '排泄部位', '胸部', '皮肤', '鼻']
    keshi = ['眼科', '五官科', '皮肤科', '骨外科', '康复科', '中医骨伤科', '中医科', '耳鼻喉科', '理疗科', '体检科', '皮肤性病科', '泌尿内科', '遗传病科', '肝胆外科', '中西医结合科', '内科', '心胸外科', '肿瘤内科', '营养科', '药品科', '外科', '肛肠科', '神经内科', '烧伤科', '口腔科', '血液科', '小儿内科', '心理科', '神经外科', '泌尿外科', '肾内科', '消化内科', '肿瘤外科', '风湿免疫科', '呼吸内科', '普外科', '内分泌科', '妇产科', '妇科', '男科', '儿科综合', '精神科', '急诊科', '感染科', '其他科室', '传染科', '中医理疗科', '心内科', '小儿外科', '整形美容科', '儿科', '性病科', '产科', '肿瘤科',
             '生殖健康', '保健养生', '辅助检查', '重症监护', '其他综合', '中医综合', '不孕不育', '肝病', '减肥']
Ejemplo n.º 35
0
def main():
    if not has_py2neo:
        sys.exit("[!] py2neo must be installed for this script.")

    if not has_evtx:
        sys.exit("[!] python-evtx must be installed for this script.")

    if not has_lxml:
        sys.exit("[!] lxml must be installed for this script.")

    if not has_lxml:
        sys.exit("[!] numpy must be installed for this script.")

    if not has_changefinder:
        sys.exit("[!] changefinder must be installed for this script.")

    try:
        graph_http = "http://" + NEO4J_USER + ":" + NEO4J_PASSWORD +"@" + NEO4J_SERVER + ":" + NEO4J_PORT + "/db/data/"
        GRAPH = Graph(graph_http)
    except:
        sys.exit("[!] Can't connect Neo4j Database.")

    print("[*] Script start. %s" % datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S"))

    if args.run:
        try:
            app.run(threaded=True, host="0.0.0.0", port=WEB_PORT)
        except:
            sys.exit("[!] Can't runnning web application.")

    # Delete database data
    if args.delete:
        GRAPH.delete_all()
        print("[*] Delete all nodes and relationships from this Neo4j database.")

    if args.evtx:
        for evtx_file in args.evtx:
            if not os.path.isfile(evtx_file):
                sys.exit("[!] Can't open file {0}.".format(evtx_file))
        parse_evtx(args.evtx, GRAPH)

    if args.xmls:
        for xml_file in args.xmls:
            if not os.path.isfile(xml_file):
                sys.exit("[!] Can't open file {0}.".format(xml_file))
        parse_evtx(args.xmls, GRAPH)

    print("[*] Script end. %s" % datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S"))
Ejemplo n.º 36
0
class Build_Configuration:
    def __init__(self):
        self.graph = Graph()
        self.graph.delete_all()
        self.namespace = ["Start"]
        self.parent_node = []

    def check_duplicates(self, label, name):
        # print "label",label,name
        if self.graph.find_one(label, property_key="name", property_value=name) != None:
            raise ValueError("Duplicate Node", label, name)

    def get_namespace(self, name):
        print self.namespace, name
        temp = copy.deepcopy(self.namespace)
        temp.append(name)
        return_value = "/".join(temp)
        return return_value

    def get_parent_node(self):
        return self.parent_node[-1]

    def pop_namespace(self):
        del self.namespace[-1]
        del self.parent_node[-1]

    # concept of namespace name is a string which ensures unique name
    # the name is essentially the directory structure of the tree
    def construct_node(self, push_namespace, relationship, label, name, properties):
        namespace = self.get_namespace(name)

        self.check_duplicates(label, name=namespace)

        node = Node(label)
        node.properties["namespace"] = namespace
        node.properties["name"] = name
        for i in properties.keys():
            node.properties[i] = properties[i]
        self.graph.create(node)
        if len(self.parent_node) != 0:
            relation_enity = Relationship(self.get_parent_node(), relationship, node)

            self.graph.create(relation_enity)

        if push_namespace == True:
            self.namespace.append(name)
            self.parent_node.append(node)
Ejemplo n.º 37
0
class Neo4j():

    def __init__(self, host='localhost:7474', username='******', password='******'):
        if not has_py2neo:
            raise Exception('py2neo is required, please install: pip install py2neo')
        authenticate(host, username, password)
        self.graph = Graph("http://{}/db/data/".format(host))

    def load_events_directory(self, directory):
        self.events = []
        for path in glob.glob(os.path.join(directory, '*.json')):
            e = MISPEvent()
            e.load(path)
            self.import_event(e)

    def del_all(self):
        self.graph.delete_all()

    def import_event(self, event):
        tx = self.graph.begin()
        event_node = Node('Event', uuid=event.uuid, name=event.info)
        # event_node['distribution'] = event.distribution
        # event_node['threat_level_id'] = event.threat_level_id
        # event_node['analysis'] = event.analysis
        # event_node['published'] = event.published
        # event_node['date'] = event.date.isoformat()
        tx.create(event_node)
        for a in event.attributes:
            attr_node = Node('Attribute', a.type, uuid=a.uuid)
            attr_node['category'] = a.category
            attr_node['name'] = a.value
            # attr_node['to_ids'] = a.to_ids
            # attr_node['comment'] = a.comment
            # attr_node['distribution'] = a.distribution
            tx.create(attr_node)
            member_rel = Relationship(event_node, "is member", attr_node)
            tx.create(member_rel)
            val = Node('Value', name=a.value)
            ev = Relationship(event_node, "has", val)
            av = Relationship(attr_node, "is", val)
            s = val | ev | av
            tx.merge(s)
            #tx.graph.push(s)
        tx.commit()
Ejemplo n.º 38
0
def main2():
    authenticate("localhost:7474", "neo4j", "1234")
    graph = Graph(GRAPH_CONNECTION_STRNIG)

    graph.delete_all()

    banana = Node("Fruit", name="banana", colour="yellow", tasty=True)
    graph.create(banana)

    t = graph.merge_one("Fruit", 'name', 'apple')
    t['colour'] = 'green'
    t['tasty'] = True
    t.push()

    alice = Node("Person", name="Alice")
    bob = Node("Person", name="Bob")
    alice_knows_bob = Relationship(alice, "KNOWS", bob, since=1999)
    graph.create(alice)
    graph.create(bob)
    graph.create(alice_knows_bob)
Ejemplo n.º 39
0
	elif nodes[0] == 0:
		q.put("MATCH (n:roots {type:'NotSponsored'}) MATCH (f:files {filename:'"+nodes[1]+"'}) MERGE(n)-[:has]->(f)")
	else:
		q.put("MATCH (n:roots {type:'Testing'}) MATCH (f:files {filename:'"+nodes[1]+"'}) MERGE(n)-[:has]->(f)")
	for n in nodes[2:]:
		q.put("MERGE (w:website {website:'"+n+"'})") #create website node if it doesn't already exist
		q.put("MATCH (f:files {filename:'"+nodes[1]+"'}) MATCH (w:website {website:'"+n+"'}) MERGE(f)-[:links]->(w)")

train = pd.read_csv("./data/train.csv", header=0, delimiter=",", quoting=3)
sample = pd.read_csv("./data/sampleSubmission.csv", header=0, delimiter=",", quoting=3)

print("Starting processing...")

authenticate("localhost:7474", "neo4j", "neo4j") #username and password
graph = Graph() #by default, py2neo opens localhost
graph.delete_all() #deletes all nodes and edges (clears old data)
tx = graph.cypher.begin()
tx.append("CREATE(n:roots {type: 'Sponsored'})")
tx.append("CREATE(n:roots {type: 'NotSponsored'})")
tx.append("CREATE(n:roots {type: 'Testing'})")
tx.commit()
q = Queue.Queue()

for i, zipFile in enumerate(process_zips):			
	archive = zipfile.ZipFile(zipFile, 'r')
	file_paths = zipfile.ZipFile.namelist(archive)
	bar = ProgressBar(len(file_paths), max_width=40)
	pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()-1 or 1)
	for k, file_path in enumerate(file_paths):
		data = archive.read(file_path)
		openfile = file_path[2:] #filename
Ejemplo n.º 40
0
    print("Ended Tags Load" + str(datetime.datetime.now()))
    return
    
    


##################BEGIN PROCESS HERE#########################
# ------------------------------------------------------------
#  this 'for loop' will set 'line' to an input line from system 
#    standard input file
# ------------------------------------------------------------
initTags()
graph = Graph()


graph.delete_all()


#CREATE ALL GENRE NODES HERE
ActionGenreNode = Node("Genre", name="Action")
AdventureGenreNode = Node("Genre", name="Adventure")
AnimationGenreNode = Node("Genre", name="Animation")
ChildrensGenreNode = Node("Genre", name="Childrens")
ComedyGenreNode = Node("Genre", name="Comedy")
CrimeGenreNode = Node("Genre", name="Crime")
DocumentaryGenreNode = Node("Genre", name="Documentary")
DramaGenreNode = Node("Genre", name="Drama")
FantasyGenreNode = Node("Genre", name="Fantasy")
FilmNoirGenreNode = Node("Genre", name="FilmNoir")
HorrorGenreNode = Node("Genre", name="Horror")
MusicalGenreNode = Node("Genre", name="Musical")
Ejemplo n.º 41
0
class TestUser(unittest.TestCase):

    USER_DATA = {'contributors_enabled': False,
                 'created_at': 'Fri Jun 12 11:13:21 +0000 2009',
                 'default_profile': False,
                 'default_profile_image': False,
                 'description': 'We are the UK’s leading energy supplier and committed to '
                                'looking after your world. For Emergency numbers visit '
                                'http://t.co/GVkMDCUzW3',
                 'entities': {'description': {'urls': [{'display_url': 'britishgas.co.uk/emergency',
                                                        'expanded_url': 'http://www.britishgas.co.uk/emergency',
                                                        'indices': [111, 133],
                                                        'url': 'http://t.co/GVkMDCUzW3'}]},
                              'url': {'urls': [{'display_url': 'britishgas.co.uk/the-source',
                                                'expanded_url': 'http://www.britishgas.co.uk/the-source',
                                                'indices': [0, 22],
                                                'url': 'http://t.co/rlasQ9hHeu'}]}},
                 'favourites_count': 431,
                 'follow_request_sent': False,
                 'followers_count': 36081,
                 'following': False,
                 'friends_count': 4774,
                 'geo_enabled': True,
                 'id': 46630225,
                 'id_str': '46630225',
                 'is_translation_enabled': False,
                 'is_translator': False,
                 'lang': 'en',
                 'listed_count': 400,
                 'location': 'Staines, Middlesex',
                 'name': 'British Gas ',
                 'notifications': False,
                 'profile_background_color': '00AEDE',
                 'profile_background_image_url': 'http://pbs.twimg.com/profile_background_images/831694128/7187a2d2a890b67c21ae04c18861f5b9.jpeg',
                 'profile_background_image_url_https': 'https://pbs.twimg.com/profile_background_images/831694128/7187a2d2a890b67c21ae04c18861f5b9.jpeg',
                 'profile_background_tile': False,
                 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/46630225/1400584801',
                 'profile_image_url': 'http://pbs.twimg.com/profile_images/552048129055289344/6oPZvR3T_normal.jpeg',
                 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/552048129055289344/6oPZvR3T_normal.jpeg',
                 'profile_link_color': '1890C4',
                 'profile_location': None,
                 'profile_sidebar_border_color': 'FFFFFF',
                 'profile_sidebar_fill_color': 'D9EDF9',
                 'profile_text_color': '333333',
                 'profile_use_background_image': True,
                 'protected': False,
                 'screen_name': 'BritishGas',
                 'status': {'contributors': None,
                            'coordinates': None,
                            'created_at': 'Mon Mar 02 18:45:18 +0000 2015',
                            'entities': {'hashtags': [],
                                         'media': [{'display_url': 'pic.twitter.com/ec4iusBe4Q',
                                                    'expanded_url': 'http://twitter.com/BritishGas/status/572467734367191041/photo/1',
                                                    'id': 572425479120007168,
                                                    'id_str': '572425479120007168',
                                                    'indices': [108, 130],
                                                    'media_url': 'http://pbs.twimg.com/media/B_Gp8L9UsAAe8ap.png',
                                                    'media_url_https': 'https://pbs.twimg.com/media/B_Gp8L9UsAAe8ap.png',
                                                    'sizes': {'large': {'h': 500,
                                                                        'resize': 'fit',
                                                                        'w': 1000},
                                                              'medium': {'h': 300,
                                                                         'resize': 'fit',
                                                                         'w': 600},
                                                              'small': {'h': 170,
                                                                        'resize': 'fit',
                                                                        'w': 340},
                                                              'thumb': {'h': 150,
                                                                        'resize': 'crop',
                                                                        'w': 150}},
                                                    'type': 'photo',
                                                    'url': 'http://t.co/ec4iusBe4Q'}],
                                         'symbols': [],
                                         'urls': [],
                                         'user_mentions': []},
                            'favorite_count': 4,
                            'favorited': False,
                            'geo': None,
                            'id': 572467734367191041,
                            'id_str': '572467734367191041',
                            'in_reply_to_screen_name': None,
                            'in_reply_to_status_id': None,
                            'in_reply_to_status_id_str': None,
                            'in_reply_to_user_id': None,
                            'in_reply_to_user_id_str': None,
                            'lang': 'en',
                            'place': None,
                            'possibly_sensitive': False,
                            'retweet_count': 3,
                            'retweeted': False,
                            'source': '<a href="https://ads.twitter.com" '
                                      'rel="nofollow">Twitter Ads</a>',
                            'text': 'Afraid of the dust bunny lurking behind your fridge? '
                                    'Check out our guide to cleaning up those fridge coils: '
                                    'http://t.co/ec4iusBe4Q',
                            'truncated': False},
                 'statuses_count': 13664,
                 'time_zone': 'London',
                 'url': 'http://t.co/rlasQ9hHeu',
                 'utc_offset': 0,
                 'verified': True}

    def setUp(self):
        self.g = Graph(get_graph_url("dev"))

    def tearDown(self):
        self.g.delete_all()

    def testAddNewUser(self):
        u = User.new(self.g, properties=self.USER_DATA)
        u.get_followers()
Ejemplo n.º 42
0

#graphdb = Graph('http://localhost:7474/default.graphdb')



#Sam
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''



graphdb = Graph()
graphdb.delete_all()


INSERT_USER_URL_QUERY = '''
    MERGE (user:User {username: {username}})
    MERGE (url:URL {url: {url}})
    CREATE UNIQUE (user)-[:SHARED]->(url)
    FOREACH (kw in {keywords} | MERGE (k:Keyword {text: kw}) CREATE UNIQUE (k)<-[:IS_ABOUT]-(url))
    FOREACH (author in {authors} | MERGE (a:Author {name: author}) CREATE UNIQUE(a)<-[:WRITTEN_BY]-(url))
'''

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit = True, wait_on_rate_limit_notify = True)
#api = tweepy.API(auth)
Ejemplo n.º 43
0
#!/usr/bin/python
from __future__ import unicode_literals
from py2neo import authenticate, Graph
import argparse
import sys


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="ctAutocompletion database clearing script")
    parser.add_argument('--src', dest='src', default=None, help='')
    parser.add_argument('--elastic', dest='elastic', default=None, help='Elasticsearch authentication (optional)')
    parser.add_argument('--neo4j', dest='neo4j', default=None, help='Neo4j authentication (required)')
    args = parser.parse_args()

    try:
        # Check if Neo4j auth is provided
        if args.neo4j:
            (username, password) = tuple(args.neo4j.split(":"))
        else:
            username = "******"
            password = "******"

        authenticate("localhost:7474", username, password)
        db = Graph()
        db.delete_all()

    except Exception as err:
        print("Please provide Neo4j authentication\n\t--neo4j 'username:secret-password'")
        sys.exit(1)
Ejemplo n.º 44
0
class SyntaxGraph():
    
    """
    The aim of this class is to find associated words to database syntax.
    A user will input a sentence, and these associations will be used to
    find the correct SQL statement to execute in the database.
    
    The relations between words are modelled as a graph. The nodes of the 
    graph are the words, and the edges (relationships) between nodes
    represent when a word means another word (e.g. is a synonym).
    
    The graph is "seeded" using a set of database syntax words, finding 
    synonyms/related words to these initial words using a call to a
    thesaurus API.
    
    The graph is then "grown" from the resulting synonyms using subsequent
    API calls, in a recursive fashion.
    
    When a user enters a sentence, this graph will be used to find 
    database syntax words which are within a certain "degree of 
    separation" from each word in the sentence, in an attempt to 
    start building a SQL query from this sentence.
    """
    
    def __init__(self, seed_words=None, seed_mappings=None):
        
        self.sql_terms = SQLTerms().sql_terms
        
        self.graph = Graph(DB_URI)
        self.tx = self.graph.cypher.begin()
        
        self.seed_mappings = seed_mappings or {'where': ['filter', 'for', 'during'],
                                               'from': ['source', 'in'],
                                               'into': ['toward', 'within', 'inside'],
                                               'group':['by'],
                                               'and': ['with']}
        
        self.seed_words = seed_words or [x for x in self.sql_terms if x not in self.seed_mappings]
    
        self.seed_words.extend([x for x in self.seed_mappings.iterkeys()])
        
        self.exclude_words = ['display']
        
    def seed(self, reset=False):
        
        print 'Seeding graph'
        
        if reset:
            self.graph.delete_all()
        
        for word in self.seed_words:
            if not self.already_called(word):
                self.add_synonyms(word)
            if word in self.seed_mappings:
                print 'Mapping %s to %s' % ( ','.join(self.seed_mappings[word]), word )
                base = self.graph.merge_one('Word', 'name', word)
                synonyms = [self.graph.merge_one('Word', 'name', x) for x in self.seed_mappings[word]]
                [self.graph.create_unique(Relationship(base, 'MEANS', synonym)) for synonym in synonyms]
                [self.graph.create_unique(Relationship(synonym, 'MEANS', base)) for synonym in synonyms]
            
                
    def grow(self, levels=1):
        
        print 'Levels left: %d' % levels
        
        query = ''' MATCH (w:Word)
                    WHERE NOT HAS (w.called)
                    RETURN w.name
                '''
        
        results = self.graph.cypher.execute(query)     
        
        for word in results:
            self.add_synonyms(word['w.name'])
            
        if levels > 1:
            self.grow(levels-1)
                
            
    def already_called(self, word):
        
        if len (self.graph.cypher.execute('''MATCH (w:Word)
                                             WHERE w.name = '%s'
                                               AND HAS (w.called)
                                             RETURN w.name 
                                          ''' % word) ) > 0:
            return True
        
    def update_set_called(self, word):
        
        word_node = self.graph.merge_one('Word', 'name', word)
        word_node.properties['called'] = 1
        word_node.push()
        
    def add_synonyms(self, word):
                                     
        url = 'http://words.bighugelabs.com/api/2/%s/%s/json' % (API_KEY, word)
        print url
        
        response = requests.get(url)
        
        try:
            data = response.json()
        except JSONDecodeError:
            self.update_set_called(word)
            return
        
        if 'verb' in data:
            for key in data['verb']:
                # Synonyms: words are all interrelated (connected graph)
                if key == 'syn':
                    
                    synonyms = [word]
                    synonyms.extend([x for x in data['verb'][key] if ' ' not in x])
                    
                    nodes = [self.graph.merge_one('Word', 'name', x) for x in synonyms]
                    [self.graph.create_unique(Relationship(i, 'MEANS', j)) for j in nodes for i in nodes if i!=j]
                    
                # Similar / user defined words: words are related both ways between root and related words (both direction)
                elif key in ('sim', 'usr'):
                    
                    related_words = [word]
                    related_words.extend([x for x in data['verb'][key] if ' ' not in x])
                    
                    nodes = [self.graph.merge_one('Word', 'name', x) for x in related_words]
                    [self.graph.create_unique(Relationship(nodes[i], 'MEANS', nodes[j])) for j in range(len(nodes)) for i in range(len(nodes)) if (i+j>0 and i*j==0)]
                    
                # Related words: words are related only from root to related word (one direction)
                elif key == 'rel':
                    
                    related_words = [word]
                    related_words.extend([x for x in data['verb'][key] if ' ' not in x])
                    
                    nodes = [self.graph.merge_one('Word', 'name', x) for x in related_words]
                    [self.graph.create_unique(Relationship(nodes[0], 'MEANS', nodes[i])) for i in range(1, len(nodes))]
            
        self.update_set_called(word)
        
    def replace_word(self, word, max_degree_separation=2):
        
        if word in self.seed_words or word in self.exclude_words: return word
        
        replacement_candidates = []
        
        for seed_word in self.seed_words:
        
            query = '''MATCH p=shortestPath((w:Word{name:"%s"})-[*]-(n:Word{name:"%s"}))
                       RETURN length(p), n.name
                    ''' % (word, seed_word)
                    
            results = self.graph.cypher.execute(query)
            
            try:
                replacement_candidates.append(min([(row['length(p)'], row['n.name']) for row in results]))
            except ValueError:
                pass

        if len(replacement_candidates) > 0:
            replacement = min(replacement_candidates)
            if replacement[0] <= max_degree_separation:
                return replacement[1]
        
    def replace_text(self, text):
        
        pattern = re.compile('[\W_]+')
        cleaned = []
        replacements = []
        
        for word in text.split():
            cleaned_word = pattern.sub('', word)
            
            if cleaned_word not in [x[0] for x in cleaned]:
                cleaned.append([cleaned_word, self.replace_word(cleaned_word)])
            
            replacements.append(self.replace_word(cleaned_word) or cleaned_word)
        
        return ' '.join(replacements)
Ejemplo n.º 45
0
class Neo4jWrapper(object):

    def __init__(self, user_name, password, connection_string="", current_id=1):
        self.connection_string = connection_string \
                    if connection_string != "" \
                    else "http://"+ user_name+":"+password+"@localhost:7474/db/data/"
        self.graph_db = Graph(self.connection_string)
        self.current_id = current_id

    def delete_all_nodes(self):
        self.graph_db.delete_all()

    def insert_single_node(self, node_map):
        """
        Insert pre specified node type
        :param node_map:  format {'url':, 'brand': , 'tld': , 'website':}
        :return:
        """
        self.graph_db.cypher.execute("CREATE (w:Websites {id:{I}, brand:{BRAND}, website:{W}, url:{U}, tld:{TLD}})",
                  {"BRAND": node_map['brand'], "I": self.current_id, "W": node_map['website'],
                   "U": node_map['url'], "TLD": node_map['tld']})
        self.current_id += 1

    def insert_as_transaction(self, node_map_list):
        """
        Insert nodes with transaction
        :param node_map_list: list of data in format {'url':, 'brand': , 'tld': , 'website':}
        :return: None
        """
        tx = self.graph_db.cypher.begin()
        for node_map in node_map_list:
            self.current_id += 1
            tx.append("CREATE (w:Websites {id:{I}, brand:{B}, website:{W}, url:{U}, tld:{T}})", {
                "B": node_map['brand'], "I": self.current_id, "W": node_map['website'],
                "U": node_map['url'], "T": node_map['tld']})

        tx.commit()

    def batch_create(self, node_map_list):
        """ Execute multiple insert as batch jobs
        :param node_map_list:
        """
        batch = neo4j.WriteBatch(self.graph_db)
        for node_map in node_map_list:
            self.current_id += 1
            batch.append(self.create_cypher_job(None, node_map))
        return batch.submit()

    def create_cypher_job(self, statement=None, params_dict=dict()):
        """
        Create cypher job for bach insert
        :param statement:
        :param params_dict:
        :return: neo4j.CypherJob
        """
        default_statement = "CREATE (w:Websites {id:{I}, brand:{B}, website:{W}, url:{U}, tld:{T}})"
        default_params = {"B": params_dict['brand'], "I": self.current_id, "W": params_dict['website'],
                          "U": params_dict['url'], "T": params_dict['tld']}

        if statement is None or len(params_dict) is 0:
            return neo4j.CypherJob(default_statement, default_params)
        else:
            return neo4j.CypherJob(statement, params_dict)

    def insert_single_with_loop(self, node_map_list, insertion_type="single", insertion_size=0):
        """
        Convenient method to loop on data list
        :param node_map_list: list for dict in format {'url':, 'brand': , 'tld': , 'website':}
        :param insertion_type: single | transaction | batch
        :param insertion_size: integer - the number of nodes inserted in a single transaction
        :return: None
        """
        cnt = 0
        temp_node_holder = []
        for node_map in node_map_list:
            if insertion_type is 'single':
                self.insert_single_node(node_map)
            elif insertion_type is 'transaction' or 'batch':
                cnt += 1
                if cnt % insertion_size is 0:
                    self.insert_as_transaction(temp_node_holder) \
                        if insertion_type is 'transaction' \
                        else self.batch_create(temp_node_holder)
                    # replace line instead new line
                    sys.stdout.write("\r++++INSERTED %d++++" % cnt)
                    sys.stdout.flush()
                    temp_node_holder = []
                else:
                    temp_node_holder.append(node_map)

        # insert any remaining
        if insertion_type is 'transaction' or 'batch':
            self.insert_as_transaction(temp_node_holder) \
                        if insertion_type is 'transaction' \
                        else self.batch_create(temp_node_holder)
Ejemplo n.º 46
0
import unicodedata
from py2neo import Graph, Node, Relationship

g = Graph()
g.delete_all()

starts = 'Stockholm', 'Edinburgh'

for start in starts:
	print "\nStarting " + start
	start_node = g.merge_one('Airport', property_key='name', property_value=start)
	for l in open(start + '.csv'):
		items = l.split(',')
		airline = unicodedata.normalize('NFKD', unicode(items[0], encoding='utf-8')).encode('ascii', 'ignore')
		print "Airline is " + airline
		for airport in items[1:]:
			airport = unicodedata.normalize('NFKD', unicode(airport, encoding='utf-8')).encode('ascii', 'ignore').strip().split('-')[0]
			print "Endpoint is " + airport
			end_node = g.merge_one("Airport", property_key='name', property_value=airport)
			g.create(Relationship(start_node, "FLIES_TO", end_node, airline=airline))
Ejemplo n.º 47
0
def setup_schema():
  def constraint(label, key):
    try:
      graph.schema.create_uniqueness_constraint(label, key)
    except:
      pass
  constraint("Repository", "full_name")
  constraint("User", "login")
  constraint("PullRequest", "id")
  constraint('ProcessingStatus', 'last_processed_date')

graph = Graph(os.environ.get('NEO4J_CONNECTION_STRING'))

if args.drop:
  print '!!! DROPPING DATABASE !!!'
  graph.delete_all();

setup_schema();

if args.file:
  load_from_file(args.file)
if args.download_from_date:
  load_from_date(args.download_from_date)
if args.cont:
  status = graph.find_one('ProcessingStatus')
  if status:
    date = status.properties['date']
    date = datetime.datetime.strptime(date, '%Y-%m-%d-%H')
    date += datetime.timedelta(hours=1)
  else:
    date = datetime.datetime(2011, 2, 12)
Ejemplo n.º 48
0
__author__ = 'Marnee Dearman'
from py2neo import Graph, Node, Relationship
from settings import graphene

# What is the URL to my NEO4J
tuple_graph = Graph(graphene.DATABASE_URL)
print tuple_graph

# start over with delete so I can run the whole script at one time
tuple_graph.delete_all()

# Let's try modeling the group
# using Py2Neo.  This is the Python Meetup after all

# Example CYPHER
# CREATE (m:MEMBER {name:"Marnee"} )
# RETURN m
# Create a member aliased as "m" with name "Marnee"
# Return that member's node (m)

# Example PY2NEO
# one way is to setup a dictionary with the properties for the new Node, in this case MEMBER
# member_properties = {}
# member_properties["name"] = "Julian"
# show the code for py2neo
# member_node = Node.cast("MEMBER", member_properties)
# member_node = Node.cast("MEMBER", name="Julian", python_years=5)
# tuple_graph.create(member_node)

# Julian is lonely, let's give him a friend
Ejemplo n.º 49
0
class test_pipeline(unittest.TestCase):
    LEN_DATETIME = 26
    LEN_TEST_FILE = 632

    def setUp(self):
        try:
            __location__ = os.path.realpath(
                os.path.join(os.getcwd(), os.path.dirname(__file__)))
            self.src = open(
                os.path.join(__location__, "data/bit-test-data.txt"))
            self.badFreq = open(
                os.path.join(__location__, "data/bad-frequency.txt"))
            self.badStartTime = open(
                os.path.join(__location__, "data/bad-starttime.txt"))
            self.graph = Graph("http://localhost:8484/db/data")
            self.graph.delete_all()
            self.service = WaferService(self.graph)
        except:
            print "Error during unittest setup"

    def tearDown(self):
        self.graph.delete_all()

    #
    # File tests
    #
    def test_open(self):
        self.assertEquals(len(self.src.read().split("\n")), 20)

    #
    # Parser tests
    #
    def test_parser(self):
        bitdo = parser.BITdo(self.src)
        self.assertEquals(len(bitdo.toJson()), test_pipeline.LEN_TEST_FILE)
        self.assertEquals(len(bitdo.channels.keys()), 5)
        self.assertEquals(bitdo.header["SamplingFrequency"], "1000")
        self.assertEquals(len(bitdo.channels["EMG"]), 16)
        # Assure that datetime is to microsecond precision
        self.assertEquals(
            len(bitdo.header["StartDateTime"]), test_pipeline.LEN_DATETIME)


    def test_parser_errors(self):
        self.assertRaises(AttributeError, parser.BITdo, (self.badFreq))
        self.assertRaises(AttributeError, parser.BITdo, (self.badStartTime))

    #
    # Aggregator tests
    #
    def test_aggregator_nums(self):
        a = [0, 0, 1, 1, 1]
        s = aggregator.streaksIn(a)
        self.assertEquals(s[0].getStreaks(), [2])
        self.assertEquals(s[0].getStreakExp(2), [4])
        self.assertEquals(s[1].getStreaks(), [3])
        self.assertEquals(s[1].getStreakExp(2), [9])


    def test_aggregator_bools(self):
        b = [True, False, False, True, False]
        s = aggregator.streaksIn(b)
        self.assertEquals(s[True].getStreaks(), [1, 1])
        self.assertEquals(s[False].getStreaks(), [2, 1])
        self.assertEquals(s[False].getStreakExp(2), [4, 1])


    def test_aggregator_strings(self):
        c = ["cat", "826", "826", "826", "~~", "~~", "cat", "cat", "~~"]
        s = aggregator.streaksIn(c)
        self.assertEquals(s["cat"].getStreaks(), [1, 2])
        self.assertEquals(s["cat"].getStreakExp(2), [1, 4])
        self.assertEquals(s["826"].getStreaks(), [3])
        self.assertEquals(s["826"].getStreakExp(3), [27])
        self.assertEquals(s["~~"].getStreaks(), [2, 1])
        self.assertEquals(s["~~"].getStreakExp(-1), [0.5, 1])


    def test_aggregator_average(self):
        bitdo = parser.BITdo(self.src)
        self.assertEquals(aggregator.average(bitdo.channels['EMG']), 525.4375)
        self.assertEquals(aggregator.average([1, 2, 3]), 2)
        self.assertEquals(aggregator.average([x for x in range(1000)]), 499.5)

    #
    # Graph Service
    #
    def test_add_new_user(self):
        user = self.service.add_user("Duke")
        userid = user.properties["userid"]
        activity = self.service.add_activity(
            userid, "Free Throws", "no description")
        activityname = activity.properties["name"]
        self.service.add_moment(
            userid, activityname, "timestamp", ["a1:true", "a2:false"])
        self.service.add_moment(
            userid, activityname, "timestamp", ["a1:true", "a2:false"])
        self.assertEquals(count(self.graph.find("User")), 1)
        self.assertEquals(count(self.graph.find("Activity")), 1)
        self.assertEquals(count(self.graph.find("Moment")), 2)
        self.assertEquals(count(self.graph.find("Annotation")), 2)

    #
    # Graph API
    #
    def test_post_user(self):
        r = newUser('Thaddeus')
        self.assertEquals(r.status_code, 200)


    def test_post_user_fails(self):
        r = requests.post('http://localhost:8000/users', {})
        self.assertEquals(r.status_code, 400)


    def test_post_activity(self):
        r = newUser('Thaddeus')
        self.assertEquals(r.status_code, 200)
        r = newActivity('Thaddeus', 'Free-throw shooting')
        self.assertEquals(r.status_code, 200)


    def test_post_activity_fails(self):
        r = newUser('Thaddeus')
        self.assertEquals(r.status_code, 200)

        # Test explicitly, i.e. not using the helper function
        # so we are able to neglect parameters
        r = requests.post('http://localhost:8000/activities', {
            'userid': 'Thaddeus'})
        self.assertEquals(r.status_code, 400)
        r = requests.post('http://localhost:8000/users', {
            'name': 'Free-throw shooting'})
        self.assertEquals(r.status_code, 400)


    def test_post_moment(self):
        r = newUser('Thaddeus')
        self.assertEquals(r.status_code, 200)
        r = newActivity('Thaddeus', 'Free-throw shooting')
        self.assertEquals(r.status_code, 200)

        r = newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:true", "swish:true"])
        self.assertEquals(r.status_code, 201)


    def test_post_moment_fails(self):
        r = newUser('Thaddeus')
        self.assertEquals(r.status_code, 200)
        r = newActivity('Thaddeus', 'Free-throw shooting')
        self.assertEquals(r.status_code, 200)

        # Test explicitly, i.e. not using the helper function
        # so we are able to neglect parameters
        annotations = ["make:true", "swish:true"]
        r = requests.post('http://localhost:8000/moments', {
            # missing userid
            'name': 'Free-throw shooting',
            'timestamp': now(),
            'annotations[]': annotations})
        self.assertEquals(r.status_code, 400)

        r = requests.post('http://localhost:8000/moments', {
            'userid': 'Thaddeus',
            'name': 'Free-throw shooting',
            'timestamp': now()
            # missing annotations
        })
        self.assertEquals(r.status_code, 400)

        r = requests.post('http://localhost:8000/moments', {
            'userid': 'Thaddeus',
            'name': 'Free-throw shooting',
            'timestamp': now(),
            # it's `annotations[]`... sigh
            'annotations': annotations})
        self.assertEquals(r.status_code, 400)


    def test_get_moment(self):
        r = newUser('Thaddeus')
        self.assertEquals(r.status_code, 200)
        r = newActivity('Thaddeus', 'Free-throw shooting')
        self.assertEquals(r.status_code, 200)

        newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:true", "swish:true"])
        newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:false", "swish:false"])
        newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:true", "swish:false"])
        r = getMoments('Thaddeus', 'Free-throw shooting')
        self.assertEquals(r.status_code, 200)
        self.assertEquals(len(r.json()), 3)


    def test_get_moment_fails(self):
        r = newUser('Thaddeus')
        self.assertEquals(r.status_code, 200)
        r = newActivity('Thaddeus', 'Basketball')
        self.assertEquals(r.status_code, 200)

        newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:true", "swish:true"])
        newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:false", "swish:false"])
        newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:true", "swish:false"])
        # wrong acitivity name
        r = getMoments('Thaddeus', 'B_sketb_ll')
        self.assertEquals(r.status_code, 400)
Ejemplo n.º 50
0
   # html_text = Node("HTML text", page_source=html_of_new_page)
   # html_rel = Relationship(new_node,"HTML text",html_text)
   # gp.create(html_rel)
    gp.commit()
def get_the_available_crawlers():
    crawlers = ["CRAWLER-2", "CRAWLER-3", "CRAWLER-4"]
    return crawlers




graph_database_location = "http://"+database+":7474/db/data/"
graph = Graph(graph_database_location, user='******', password='******') # connect to the local graph database
if delete_graph_history == "yes":
    graph.delete_all() # Delete all the previous made nodes and relationship
    print("DATABASE DELETED !")
gp = graph.begin()

coordinates = [] # create the list for coordinates
coordinates = generate_coordinates(width, height, coordinates)   # generates coordinates based on the diff and the resolution

coordinates = generate_random_coordinates(coordinates)  # already generated coordinates are shuffled randomly

chrome_options = Options()
chrome_options.add_extension(".\process_monitor.crx") # Adding the extension to chrome
# chrome_options.add_extension("C:\\Users\crawler\Desktop\Crawler\process_monitor.crx")
chromium_path = ".\chrome-win32\chrome.exe" # Use the portable chromium browser
# If chromium browser is not required then by removing the above chromium path, it will start using the default one
# The default will be developer google chrome.
# ONly Dev channel google chrome can support the extension used here. This extension used a particular API.
Ejemplo n.º 51
0
	print str(datetime.now()) + "-" + action + "=>" + str(object);

######Set up######
##Argument Setup
parser = argparse.ArgumentParser(description="This script will support the importation of Swaggable's MySQL Database into Neo4j following the respective business rules.")
parser.add_argument("-r", "--rebuild", help="scraps and rebuilds the Neo4J graph", action="store_true")
parser.add_argument("-v", "--verbose", help="increase output verbosity", action="store_true")
args = parser.parse_args()

##Neo4J Object
neo4jConnectionString = "http://*****:*****@localhost:7474/db/data"
neo4jGraph = Graph(neo4jConnectionString)

##Rebuild?
if args.rebuild:
	neo4jGraph.delete_all()

##MySQL Object
mysqlConnectionProperties = {
	'user':'******',
	'passwd':'q7pe9vk45g637DB',
	'host':'ec2-184-169-142-95.us-west-1.compute.amazonaws.com',
	'db':'swagbag_v2',
	'autocommit':True,
	'use_unicode':True,
	'use_pure':False}
mysqlConnection = mysql.connector.connect(**mysqlConnectionProperties)
cursor = mysqlConnection.cursor()

cursor.execute("SET GLOBAL max_allowed_packet=67108864")
Ejemplo n.º 52
0
class PopItToNeo(object):
    def __init__(self):
        config = yaml.load(open("config.yaml"))
        self.endpoint = "https://sinar-malaysia.popit.mysociety.org/api/v0.1"

        # you know so that you can override this. why? I am not sure
        self.membership_field = "memberships"
        self.person_field = "persons"
        self.organization_field = "organizations"
        self.post_field = "posts"
        self.graph = Graph(config["graph_db"])
        if config["refresh"] == True:
            self.graph.delete_all()

        # Because I am still not familiar to query with cypher
        # So lets cache here. Hopefully the memory usage don't kill me
        self.organization_processed = {}
        self.person_processed = {}
        self.post_processed = {}

    def process_membership(self):
        # So lets start from membership
        membership_url = "%s/%s" % (self.endpoint, self.membership_field)
        while True:
            logging.warning("Processing %s" % membership_url)
            data = self.fetch_entity(membership_url)
            logging.warning("Processing membership")

            entries = data["result"]
            for entry in entries:

                # a membership have 3 important field, person_id, organization_id, posts_id
                if not (entry.get("person_id") and entry.get("organization_id")):
                    continue

                person = self.fetch_person(entry["person_id"])
                if not person:
                    continue
                role = entry.get("role","member")
                if not role:
                    role = "member"
                logging.warning("Role: %s" % role)

                params = []

                # This happens only once anyway
                kwparams = {}
                kwparams["popit_id"] = entry["id"]
                start_date = get_timestamp(entry.get("start_date"))
                if start_date:
                    kwparams["start_date"] = start_date
                end_date = get_timestamp(entry.get("end_date"))
                if end_date:
                    kwparams["end_date"] = end_date

                post_exist = False
                if entry.get("post_id"):
                    post = self.fetch_post(entry["post_id"])
                    if not post:
                        continue
                    if self.graph.match_one(person, role, post):
                        post_exist = True
                        logging.warning("Already exist, skipping")

                    if not post_exist:

                        relationship = Relationship(person, role, post, **kwparams)
                        self.graph.create(relationship)

                organization_exist = False

                if entry.get("organization_id"):
                    organization = self.fetch_organization(entry["organization_id"])
                    if not organization:
                        continue
                    if self.graph.match_one(person, role, organization):
                        logging.warning("Already exist, skipping")
                        organization_exist = True

                    if not organization_exist:

                        relationship = Relationship(person, role, organization, **kwparams)
                        self.graph.create(relationship)

            if data.get("next_url"):
                membership_url = data.get("next_url")
            else:
                break

    def fetch_person(self, person_id):
        if person_id in self.person_processed:
            logging.warning("Person %s fetch from cache" % person_id)
            return self.person_processed[person_id]

        node = self.graph.find_one("Persons", "popit_id", person_id)
        if node:
            logging.warning("Already exist, skipping")
            self.person_processed[person_id] = node
            return node

        person_url = "%s/%s/%s" % (self.endpoint, self.person_field, person_id)
        data = self.fetch_entity(person_url)
        if not data:
            # Don't assume that this id won't be created the next time
            logging.warning("person not exist %s" % person_id)
            return None
        logging.warning("Fetching person")

        entity = data["result"]
        if type(entity["name"]) == list:
            name = entity["name"][0]

        else:
            name = entity["name"]
        logging.warning("Name: %s" % name)
        kwparam = {}

        birth_date = get_timestamp(entity.get("birth_date"))
        if birth_date:
            kwparam["birth_date"] = birth_date
        death_date = get_timestamp(entity.get("death_date"))
        if death_date:
            kwparam["death_date"] = death_date
        kwparam["name"] = name
        kwparam["popit_id"] = entity["id"]
        node = Node("Persons", **kwparam)
        self.graph.create(node)
        self.person_processed[entity["id"]] = node
        return node

    def fetch_organization(self, organization_id):
        if organization_id in self.organization_processed:
            logging.warning("Organization %s fetch from cache" % organization_id)
            return self.organization_processed[organization_id]

        node = self.graph.find_one("Organization", "popit_id", organization_id)
        if node:
            logging.warning("Already exist, skipping")
            self.organization_processed[organization_id] = node
            return node

        organization_url = "%s/%s/%s" % (self.endpoint, self.organization_field, organization_id)
        data = self.fetch_entity(organization_url)
        if not data:
            logging.warning("Organization don't exist %s" % organization_id)
            return None
        logging.warning("Fetch orgnanization")

        entity = data["result"]
        if type(entity["name"]) == list:
            name = entity["name"][0]
        else:
            name = entity["name"]

        kwparams = {}
        logging.warning("Name: %s" % name)
        kwparams["name"] = name
        kwparams["popit_id"] = entity["id"]
        founding_date = get_timestamp(entity.get("founding_date"))
        if founding_date:
            kwparams["founding_date"] = founding_date
        dissolution_date = get_timestamp(entity.get("dissolution_date"))
        if dissolution_date:
            kwparams["dissolution_date"] = dissolution_date

        if "classification" in entity:

            logging.warning("Classification:%s" % entity["classification"])
            kwparams["classification"] = entity["classification"]

        node = Node("Organization", **kwparams)
        self.graph.create(node)
        self.organization_processed[entity["id"]] = node
        return node

    def fetch_post(self, post_id):
        if post_id in self.post_processed:
            logging.warning("Post %s fetch from cache" % post_id)
            return self.post_processed[post_id]

        node = self.graph.find_one("Posts", "popit_id", post_id)
        if node:
            logging.warning("Already exist, skipping")
            self.post_processed[post_id] = node
            return node

        post_url = "%s/% s/%s" % (self.endpoint, self.post_field, post_id)
        data = self.fetch_entity(post_url)
        if not data:
            logging.warning("Post don't exist %s" % post_id)
            return None
        logging.warning("Fetch post")

        entity = data["result"]
        # Fetch organization node, because post is link to organization
        # What is the implication of post without organization?
        try:
            if entity.get("organization_id"):
                organization = self.fetch_organization(entity["organization_id"])
            else:
                organization = None
        except Exception as e:
            logging.warning(e.message)
            organization = None
        logging.warning("Label: %s" % entity["label"])
        kwparams = {}
        kwparams["name"] = entity["label"]
        kwparams["popit_id"] = entity["id"]
        start_date = get_timestamp(entity.get("start_date"))
        if start_date:
            kwparams["start_date"] = start_date

        end_date = get_timestamp(entity.get("end_date"))
        if end_date:
            kwparams["end_date"] = end_date

        node = Node("Posts", **kwparams)
        self.graph.create(node)
        self.post_processed[entity["id"]] = node
        if organization:
            temp_param = {}
            if start_date:
                temp_param["start_date"] = start_date
            if end_date:
                temp_param["end_date"] = end_date
            relation = Relationship(node, "of", organization, **kwparams)
            self.graph.create(relation)

        return node

    def process_parent_company(self):
        organizations_url = "%s/%s" % (self.endpoint, self.organization_field)


        while True:
            data = self.fetch_entity(organizations_url)

            entries = data["result"]
            for entry in entries:
                if not entry.get("parent_id"):
                    logging.warning("No parent id, moving on")
                    continue
                else:
                    logging.warning(entry.get("parent_id"))

                # TODO: Dafuq this is not DRY.
                parent_node = self.fetch_organization(entry["parent_id"])
                if not parent_node:
                    continue
                child_node = self.fetch_organization(entry["id"])
                parent_relationship = Relationship(parent_node, "parent_of", child_node)
                if self.graph.match_one(parent_node, "parent_of", child_node):
                    logging.warning("relation exist %s %s" % (entry["id"], entry["parent_id"]))
                    continue
                self.graph.create(parent_relationship)
                if self.graph.match_one(child_node, "child_of", parent_node):
                    logging.warning("relation exist %s %s" % (entry["id"], entry["parent_id"]))
                    continue
                child_relationship = Relationship(child_node, "child_of", parent_node)
                self.graph.create(child_relationship)

            if "next_url" in data:
                organizations_url = data["next_url"]
                logging.warning(organizations_url)
            else:
                break

    def process_posts(self):
        post_url = "%s/%s" % (self.endpoint, self.post_field)
        while True:
            data = self.fetch_entity(post_url)
            entries = data["result"]
            for entry in entries:
                node = self.fetch_post(entry["id"])
                self.graph.create(node)
                # Since creating organization relationship is already part of getting post
                # ourjob is done here
            if "next_url" in data:
                post_url = data["next_url"]
                logging.warning(post_url)
            else:
                break

    def fetch_entity(self, url):
        r = requests.get(url)
        time.sleep(0.1)
        if r.status_code != 200:
            # Just to make output consistent, excception did not kill the script anyway
            return {}
        return r.json()
Ejemplo n.º 53
0
class CreateDB():
    def __init__(self):
        authenticate("localhost:7474", "neo4j", "1234")
        # authenticate("52.27.227.159:7474", "neo4j", "1234")
        self.graph = Graph(GRAPH_CONNECTION_STRNIG)

        self.link_provider = LinksProvider()
        self.tvmaze = TVMaze()
        self.shows = []

        self.node_count = 0
        self.relationship_count = 0

    def create_shows(self):
        self.graph.delete_all()
        self.shows = self.tvmaze.get_all_shows()
        print " ++++  Creating {0} shows".format(len(self.shows))
        show_count = 0
        for show in self.shows:
            starttime = datetime.datetime.now()
            print "{0}:---------------------------------------------".format(show_count)
            show_count += 1
            print " + Creating show: {0}".format(show["name"])
            show_node = self.create_show(show)

            for genre in show.get('genres', []):
                genre_node = self.create_genre(genre)
                self.set_show_genre_relationship(show_node, genre_node)

            if show['webChannel'] is not None:
                webchannel_node = self.create_web_channel(show['webChannel'])
                self.create_web_channel_show_relationship(genre_node, webchannel_node)

            if show['network'] is not None:
                network_node = self.create_network(show['network'])
                self.create_network_show_relationship(network_node, show_node)

            episode_nodes = self.create_episodes(show)
            print " + creating {0} episodes".format(len(episode_nodes))
            for episode_node in episode_nodes:
                self.create_episode_show_relationship(episode_node, show_node)
            endtime = datetime.datetime.now()
            deltatime = endtime-starttime
            print " - Operation took " + str(deltatime)
            print " - total Nodes: {0}, Relationships: {1}".format(self.node_count, self.relationship_count)
            print " - finished creating show"

        print " --- finished creating shows"
        return

    def create_show(self, show):
        show_node = graph.merge_one("Show", 'id', show['id'])

        show_node['url'] = show['url']
        show_node['name'] = show['name']
        show_node['type'] = show['type']
        show_node['status'] = show['status']
        show_node['runtime'] = show['runtime']
        show_node['premiered'] = show['premiered']
        show_node['weight'] = show['weight']
        show_node['summary'] = show['summary']
        show_node['img_medium'] = show['image'].get('medium', None)
        show_node['img_original'] = show['image'].get('original', None)

        if show['rating'] is not None:
            show_node['rating'] = show['rating']['average']

        show_node.push()
        self.node_count += 1
        return show_node

    """
    Genre
    """
    def create_genre(self, genre):
        genre_node = graph.merge_one("Genre", 'genre', genre)
        self.node_count += 1
        return genre_node

    def set_show_genre_relationship(self, show_node, genre_node):
        show_of_genre = Relationship(show_node, "of genre", genre_node)
        graph.create_unique(show_of_genre)
        self.relationship_count += 1
        return genre_node

    """
    Network
    """
    def create_network(self, network):
        network_node = graph.merge_one("Network", 'id', network['id'])
        network_node['name'] = network['name']
        network_node.push()
        self.node_count += 1

        if network['country'] is not None:
            country_node = self.create_country(network['country'])
            self.create_network_show_relationship(country_node, network_node)

        return network_node

    def create_network_show_relationship(self, network_node, show_node):
        show_of_network = Relationship(show_node, "from", network_node)
        graph.create_unique(show_of_network)
        self.relationship_count += 1
        return show_of_network
    """
    WebChannel
    """
    def create_web_channel(self, webChannel):
        webchannel_node = graph.merge_one("WebChannel", 'id', webChannel['id'])
        webchannel_node['name'] = webChannel['name']
        webchannel_node.push()
        self.node_count += 1

        if webChannel['country'] is not None:
            country_node = self.create_country(webChannel['country'])
            self.create_country_web_channel_relationship(country_node, webchannel_node)

        return webchannel_node

    def create_web_channel_show_relationship(self, show_node, webchannel_node ):
        show_of_webchannel = Relationship(show_node, "from", webchannel_node)
        graph.create_unique(show_of_webchannel)
        self.relationship_count += 1
        return show_of_webchannel

    """
    Country
    """
    def create_country(self, country):
        country_node = graph.merge_one("Country", 'code', country['code'])
        country_node['name'] = country['name']
        country_node['timezone'] = country['timezone']
        country_node.push()
        self.node_count += 1
        return country_node

    def create_country_web_channel_relationship(self, country_node, webchannel_node):
        webchannel_from_country = Relationship(webchannel_node, "from", country_node)
        graph.create_unique(webchannel_from_country)
        self.relationship_count += 1
        return webchannel_from_country

    def create_network_show_relationship(self, country_node, network_node ):
        network_from_country = Relationship(network_node, "from", country_node)
        graph.create_unique(network_from_country)
        self.relationship_count += 1
        return network_from_country

    """
    Episodes
    """
    def create_episodes(self, show):
        episodes = self.tvmaze.get_show_episodes(show["id"])

        episode_nodes = []
        for episode in episodes:
            episode_node = self.create_episode(episode)
            episode_nodes.append(episode_node)

            episode_links = self.link_provider.get_links_for_episode(show['name'], episode['season'], episode['number'])

            for link in episode_links:
                link_node = self.create_link(link)
                self.create_link_episode_relationship(link_node, episode_node)

        return episode_nodes

    def create_episode(self, episode):
        episode_node = graph.merge_one("Episode", 'id', episode['id'])
        episode_node['name'] = episode['name']
        episode_node['season'] = episode['season']
        episode_node['number'] = episode['number']
        episode_node['airdate'] = episode['airdate']
        episode_node['airtime'] = episode['airtime']
        episode_node['airstamp'] = episode['airstamp']
        episode_node['runtime'] = episode['runtime']
        episode_node['summary'] = episode['summary']

        if episode['image'] is not None:
            episode_node['img_medium'] = episode['image'].get('medium', None)
            episode_node['img_original'] = episode['image'].get('original', None)

        episode_node.push()
        self.node_count += 1
        return episode_node

    def create_episode_show_relationship(self, episode_node, show_node):
        show_has_episode = Relationship(show_node, "has", episode_node)
        graph.create_unique(show_has_episode)
        self.relationship_count += 1
        return show_has_episode

    """
    Link
    """
    def create_link(self, link):
        """
        :param link:
        {
            url: str,
            host: str
        }
        :return:
        """
        link_node = graph.merge_one("Link", 'url', link['url'])
        link_node['host'] = link["host"]
        link_node.push()
        self.node_count += 1
        return link_node

    def create_link_episode_relationship(self, link_node, episode_node):
        link_has_episode = Relationship(episode_node, "has", link_node)
        graph.create(link_has_episode)
        self.relationship_count += 1
        return link_has_episode
Ejemplo n.º 54
0
import networkx as nx
from py2neo import authenticate, Node, Relationship, Graph
from py2neo.packages.httpstream.http import SocketError
from requests.exceptions import ConnectionError, HTTPError
from utils import get_results, handle_http_errors
from functools import partial

client = soundcloud.Client(client_id='454aeaee30d3533d6d8f448556b50f23')

id2username_cache = {}

# need to navigate and set the password to "pass" for first time
authenticate("localhost:7474", "neo4j", "cloudchaser")
userGraph = Graph()

userGraph.delete_all()

def getUserAttr(resource, attr):
#   if hasattr(resource, 'user'): return resource.user[attr]
    if hasattr(resource, attr): return getattr(resource, attr)
    return None

getUsername = partial(getUserAttr, attr='username')
getUserid = partial(getUserAttr, attr='id')

@handle_http_errors
def id2username(profile, kind='users'):
    global id2username_dict
    username = id2username_cache.get(profile, None)
    if username is not None: return username
Ejemplo n.º 55
0
		{
		'CDs': None,
		'Discos de vinil': None,
		'Fitas cassete': None
		},
	'Importados':
		{
		'CDs': None,
		'Discos de vinil': None,
		'Fitas cassete': None
		}
	},
'Brinquedos':
	{
	'Jogos de tabuleiro': None,
	'Action Figures': None,
	'Bonecas': None,
	'Miniaturas':
		{
		'Veículos': None,
		'Construções': None
		}
	}
}

# Popula o banco de dados com nós e relações baseadas no dicionário 'categorias'
authenticate("localhost:7474", "neo4j", "secret")
stuffgraph = Graph()
stuffgraph.delete_all()
create_and_relate(categorias)