def loadneonodes(): graph = Graph(host='localhost', port=7687, password="******") #delete all the existing nodes graph.delete_all() #Read the csv file to load the CIty nodes query = """ LOAD CSV WITH HEADERS FROM 'file:///C:/Users/Vinayak/neo4j-community-3.5.12/import/City_load_data.CSV' AS row CREATE (c:City {name:row.City,id:row.custom_cityid , Latitude:toFloat(row.Latitude), Longitude:toFloat(row.Longitude)}) """ graph.run(query) # Read the csv fle to load company nodes query = """ LOAD CSV WITH HEADERS FROM 'file:///C:/Users/Vinayak/neo4j-community-3.5.12/import/Company_load_data.csv' AS row CREATE (cp:Company {name:row.Company,id:row.custom_companyid,custom_cityid:row.custom_cityid }) """ graph.run(query) # Read the csv file to load the relationship between company and cities query = """ LOAD CSV WITH HEADERS FROM 'file:///C:/Users/Vinayak/neo4j-community-3.5.12/import/Relation_bw_comp_City.csv' AS row MATCH (cp:Company { id: row.custom_companyid}),(c:City { id: row.custom_cityid}) CREATE (cp)-[:BELONGS { role: row.Belongs_to }]->(c) """ graph.run(query)
def kg_save(self): data = self.db.Conference.find({}) dict = [i for i in data] # print(dict[0]) graph = Graph('', user='', password='') graph.delete_all() tx = graph.begin() for i in dict: conference = Node('Conference', name=i["name"]) conference['ddl'] = i['ddl'] location = Node('Location', name=self._location_to_country(i["location"])) about = Node('Discipline', name=self._format_transfer(i['discipline'])) tx.merge(conference, primary_label='Conference', primary_key='name') tx.merge(location, primary_label='Location', primary_key='name') tx.merge(about, primary_label='Discipline', primary_key='name') rel_about = Relationship(about, 'has_meeting', conference) rel_loc = Relationship(conference, 'at', location) rel_loc['duration'] = i['duration'] tx.merge(rel_about) tx.merge(rel_loc) print('{} is saved'.format(rel_about)) print('{} is saved'.format(rel_loc)) tx.commit()
def upload(data): from py2neo import Node, Relationship from py2neo import Graph graph = Graph("http://localhost:7474", username="******", password="******") graph.delete_all() nodes = [] for item in data: concept, pronunciation, pos2definition = extract_item_properties(item) node_tmp = Node("Prosthodontics", name=concept) node_tmp.properties["pronunciation"] = pronunciation cnt = 1 for pos2def in pos2definition: node_tmp.properties["pos " + str(cnt)] = pos2def["pos"] # node_tmp.properties["definition "+str(cnt)]=pos2def["definition"] for attribute, value in pos2def["attributes"].iteritems(): node_tmp["def " + str(cnt) + " : " + attribute] = value graph.create(node_tmp) nodes.append(node_tmp) print "nodes create over , relation start to create" for node1 in nodes: properties = node1.properties.keys() for property in properties: if property[8:] == "cross_reference": for node2 in nodes: if node2.properties["name"] == node1[property]: graph.create( Relationship(node1, "cross_reference", node2)) print "graph create over"
def createGraph(self): """ form: (self) --> print description: function that creates the neo4j graph exemple: >>> graphWN..createGraph() creating graph... graph created """ print "creating graph..." graph = Graph() graph.delete_all() for synset in self.synset2synonym: word_node = Node("word", literal=self.synset2word[synset]) #print synset, self.synset2synonym[synset] #if graph.find(self.synset2word[synset])!=None: #print "Exist" #word_node=graph.find_one("word", 'literal', self.synset2word[synset]) #print word_node synset_node = Node("synset", name=synset) word_has_synset = Relationship(word_node, "has_synset", synset_node) if self.synset2synonym[synset][0]!='_EMPTY_': for synonym in self.synset2synonym[synset]: word_syn = Node("word", literal=synonym) synset_has_synonym = Relationship(synset_node, "has_synonym", word_syn) graph.create(synset_has_synonym) graph.create(word_has_synset) print "graph created"
def main(): g = Graph(auth=('neo4j', 'emma15emma')) g.delete_all() # loadTeams(g,"./teams.dat") # loadGames(g,"./games.dat") dupliGames(g, "./teams.dat", "./games.dat") print('Data loaded!')
class MyGraph(object): def __init__(self): self.graph = Graph('bolt://localhost:7687', user='******', password='******') self.mather = NodeMatcher(self.graph) self.node_name = 'JIKE_user' self.FOLLOWER_REL = 'FOLLOWER' self.FOLLOWING_REL = 'FOLLOWING' def add_a_rel(self, node_a, node_b, rel_type='FOLLOWER'): ab = Relationship(node_a, rel_type, node_b) self.graph.create(ab) def add_a_person(self, user): if self.search_a_person(username=user.username): return node = Node(self.node_name, **user.to_dict()) self.graph.create(node) def search_a_person(self, **properties): return self.mather.match(self.node_name, **properties).first() def flush(self): self.graph.delete_all()
class Test: def __init__(self): self.L1 = ['C语言','数组'] self.g = Graph("http://localhost:7474", username="******", password="******") self.g.delete_all() '''建立节点''' def create_node(self, label, nodes): for node_name in nodes: node = Node(label, name=node_name) self.g.create(node) print(len(nodes)) return def createRels(self, start_node, end_node, edges, rel_type, rel_name): count = 0 p = edges[0] q = edges[1] query = "match(p:%s),(q:%s) where p.name='%s'and q.name='%s' create (p)-[rel:%s{name:'%s'}]->(q)" % (start_node, end_node, p, q, rel_type, rel_name) try: self.g.run(query) count += 1 print(rel_type, count, all) except Exception as e: print(e)
class Neo4jUtil(object): def __init__(self, url, username, password): # 验证参数需要去掉http前缀 httpPrefix = "http://" # 设置登陆验证参数 authenticate(url.replace(httpPrefix, ""), username, password) # 连接neo4j数据库 self.graph = Graph(url + '/db/data/') def empty(self): self.graph.run("match (n) detach delete n") # 创建节点 def create_column_node(self, node_name): match_result = self.graph.run("MATCH (a:Column) WHERE a.name = '%s' RETURN a" % node_name).data() if len(match_result) == 0: self.graph.run("CREATE (n:Column { name: '%s' })" % node_name) # 创建关系 def create_column_relation(self, origin_name, dest_name): match_result = self.graph.run( "MATCH (a:Column)-[r:beDepColumn]->(b:Column) WHERE a.name = '%s' AND b.name = '%s' RETURN r" % (origin_name, dest_name)).data() if len(match_result) == 0: self.graph.run("""MATCH (a:Column),(b:Column) WHERE a.name = '%s' AND b.name = '%s' CREATE (a)-[r:beDepColumn]->(b)""" % (origin_name, dest_name)) # 清空库 def delete_all(self): self.graph.delete_all()
class Data(object): def __init__(self): self.g = Graph(host="127.0.0.1", http_port=7474, user="******", password="******") self.matcher = NodeMatcher(self.g) def clear(self): self.g.delete_all() def first_init(self): self.g.delete_all() matcher = NodeMatcher(self.g) with open('aaa.txt', encoding='utf-8') as f: for i in f.readlines(): i = i.strip() r = m.match(i) if r: print(r.groups()) #('A1 -> A2', '是') or ('A1', '潮流收敛调整') r2 = m2.match(r.group(1)) if r2: #print(r2.groups()) #('A1', 'A3') n1 = matcher.match(r2.group(1)).first() n2 = matcher.match(r2.group(2)).first() tempN = Relationship(n1, r.group(2), n2) else: tempN = Node(r.group(1), name=r.group(2)) self.g.create(tempN)
def createGraph(self): """ form: (self) --> print description: function that creates the neo4j graph exemple: >>> graphWN..createGraph() creating graph... graph created """ print "creating graph..." graph = Graph() graph.delete_all() for synset in self.synset2synonym: word_node = Node("word", literal=self.synset2word[synset]) #print synset, self.synset2synonym[synset] #if graph.find(self.synset2word[synset])!=None: #print "Exist" #word_node=graph.find_one("word", 'literal', self.synset2word[synset]) #print word_node synset_node = Node("synset", name=synset) word_has_synset = Relationship(word_node, "has_synset", synset_node) if self.synset2synonym[synset][0] != '_EMPTY_': for synonym in self.synset2synonym[synset]: word_syn = Node("word", literal=synonym) synset_has_synonym = Relationship(synset_node, "has_synonym", word_syn) graph.create(synset_has_synonym) graph.create(word_has_synset) print "graph created"
class KnowledgeGraph(object): def __init__(self): conn_neo4j = ConnectionNeo4j() self._graph = Graph(host=conn_neo4j.ip, auth=(conn_neo4j.username, conn_neo4j.password)) def __repr__(self): self._object = "[INFO] The neo4j version is {}.".format( py2neo.__version__) def load_file(self, cypher): self._graph.run(cypher) def add_node(self, labels, **kwargs): node = Node(labels, **kwargs) self._graph.create(node) def delete_node(self): self._graph.delete_all() # def find(self, label): # return self._graph.find_one(label=label) def find(self): data = self._graph.data('MATCH (p:F**K) return p') df = pd.DataFrame(data) print(df) def match(self): pass
def neo_viz_multiple_domains_level(self, domains, df=None): ''' Take neo4j_viz_one_domain to multiple domains so that we can compare and see overlapped backlinks between multiple domains; Only visualize the domains ''' neo_args = self.api_args['neo4j'] graph = Graph(uri=neo_args['uri'], auth=(neo_args['user_name'], neo_args['password'])) graph.delete_all() if df is None: df = self.results['majestic'] df = df[df["linkingToDomain"].isin(domains)] for d in domains: graph.run("create (d:Domain{url:'" + d + "'})") #Add backlinks domains (bd) for backlink_domain in df['linkingFromDomain'].unique(): graph.run("create (bd:Backlinks_Domain" + "{url:'" + backlink_domain + "'})") #Add link referral relationships for index, row in df.iterrows(): graph.run("match (bd:Backlinks_Domain" + "{url:'" + row['linkingFromDomain'] + "'}), (d:Domain{url:'" + row['linkingToDomain'] + "'}) create (bd)-[:Refers]->(d)") print( "please go to your Neo4j browser and run `match (n) return n` and display the graph" )
def main(): if not has_py2neo: sys.exit("[!] py2neo must be installed for this script.") if not has_evtx: sys.exit("[!] python-evtx must be installed for this script.") if not has_lxml: sys.exit("[!] lxml must be installed for this script.") if not has_numpy: sys.exit("[!] numpy must be installed for this script.") if not has_changefinder: sys.exit("[!] changefinder must be installed for this script.") if not has_pandas: sys.exit("[!] pandas must be installed for this script.") if not has_hmmlearn: sys.exit("[!] hmmlearn must be installed for this script.") if not has_sklearn: sys.exit("[!] scikit-learn must be installed for this script.") try: graph_http = "http://" + NEO4J_USER + ":" + NEO4J_PASSWORD + "@" + NEO4J_SERVER + ":" + NEO4J_PORT + "/db/data/" GRAPH = Graph(graph_http) except: sys.exit("[!] Can't connect Neo4j Database.") print("[*] Script start. %s" % datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")) if args.run: try: app.run(threaded=True, host="0.0.0.0", port=WEB_PORT) except: sys.exit("[!] Can't runnning web application.") # Delete database data if args.delete: GRAPH.delete_all() print( "[*] Delete all nodes and relationships from this Neo4j database.") if args.evtx: for evtx_file in args.evtx: if not os.path.isfile(evtx_file): sys.exit("[!] Can't open file {0}.".format(evtx_file)) parse_evtx(args.evtx) if args.xmls: for xml_file in args.xmls: if not os.path.isfile(xml_file): sys.exit("[!] Can't open file {0}.".format(xml_file)) parse_evtx(args.xmls) print("[*] Script end. %s" % datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S"))
def createGraph(): global relationships graph = Graph() graph.delete_all() visualize() for relationship in relationships: graph.create(relationship)
def create_address_relation_network(files_folder_path: str): json_reader = BitcoinJsonReader() graph = Graph(password="******", port=11004) graph.delete_all() for file_name in os.listdir(files_folder_path): read_addresses_relation_network_time_span_into_graph( graph, json_reader, os.path.join(files_folder_path, file_name))
def connect(): host = input("Please enter the neo4j database server IP (defaults to localhost): ") host = 'localhost' if len(host.strip(' ')) == 0 else host password = input("Please enter the neo4j user's database password: "******"bolt://{}:7687".format(host), user='******', password=password) # this may need to change when running from the server since there it needs the instance IP graph.delete_all() return graph
def conn_neo4j(): global g g = Graph( "http://localhost:7474", username="******", password="******" ) g.delete_all()
class GraphImporter(object): def __init__(self, graphurl, commitEvery=100): self.graph = Graph(graphurl) self.commitEvery = commitEvery self._act = None self._actC = commitEvery def delete_all(self): self.graph.delete_all() self.graph.cypher.run('CREATE INDEX ON :_Network_Node(id)') self.graph.cypher.run('CREATE INDEX ON :_Set_Node(id)') def _tx(self): if self._act is not None: return self._act self._act = self.graph.cypher.begin() self._actC = self.commitEvery return self._act def _done(self): self._actC -= 1 if self._actC == 0: # commit self._act.process() self._act.commit() sys.stdout.write('.') self._act = None def _close(self): if self._act is not None: # commit last tx self._actC = 1 self._done() def add_node(self, labels, node_id, properties): tx = self._tx() add_node(tx, labels, node_id, properties) self._done() def done_nodes(self): self._done() def append(self, query): tx = self._tx() tx.append(query) self._done() def add_edge(self, label, source_node_id, target_node_id, properties, source_type=u'_Network_Node', update_only=False): tx = self._tx() add_edge(tx, label, source_node_id, target_node_id, properties, source_type, update_only) self._done() def __call__(self, query): tx = self._tx() tx.append(query) self._done() def finish(self): self._close()
class HetioGraph: def __init__(self, user=None, password=None): self.graph = Graph(user=user, password=password) pd.set_option('display.max_rows', 200000) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.commit() def commit(self): return self.graph.begin().commit def execute_cypher(self, query): return self.graph.run(query) def create_graph_nodes(self): print('CREATING NODES') self.execute_cypher(node_import_query) def create_graph_edges(self): print('CREATING EDGES') self.execute_cypher(edge_import_query) def create_node_labels(self): for query in label_queries: self.execute_cypher(query) self.commit() def create_relationship_labels(self): for query in relationship_queries: self.execute_cypher(query) self.commit() def initialize_graph(self): print('INITIALIZING GRAPH') self.clear_database() self.create_graph_nodes() self.create_graph_edges() self.create_relationship_labels() self.create_node_labels() def discover_new_treatments(self): print('FINDING NEW TREATMENTS') data = self.execute_cypher(discover_new_treatments_query).data() df = pd.DataFrame(data) df.reset_index(drop=True,inplace=True) print(df) def clear_database(self): print('CLEARING GRAPH') self.graph.delete_all()
class GraphMaker(object): ''' neo4j: (https://10-0-1-111-33931.neo4jsandbox.com/browser/) Entire triple: CREATE (Keanu:Person {name:'Keanu Reeves', born:1964})-[:ACTED_IN {roles:['Neo']}]->(TheMatrix:Movie {title:'The Matrix', released:1999, tagline:'Welcome to the Real World'}) MATCH(N) RETURN N Create node: CREATE (n:Page {title:'Finance', url:'https://en.wikipedia.org/wiki/Finance'}) Get node (as "n") match(n:Page {title: "Finance"}) node = self.graph.evaluate("match (n:Section) where n.title='See also' return n") ''' def __init__(self): authenticate("localhost:7474", "neo4j", "ece406") self.graph = Graph("http://localhost:7474/db/data/") self.graph.delete_all() def appendNode(self, node): self.graph.create(node) def appendNodes(self, *nodes): for node in nodes: self.graph.create(node) def makeRelationship(self, subjectnode, propertystring, objectnode): self.graph.create(Relationship(subjectnode, propertystring, objectnode)) def drawGraph(self): options = {"Page": "title", "Section": "title"} draw(self.graph, options) def getData(self, querystring=None): if querystring is None: querystring = "match (n) return n" return self.graph.data(querystring) def printData(self, querystring=None): data = self.getData(querystring) for d in data: print(d) def getNodeByTitle(self, nodeTitle): node = self.graph.evaluate("match (n:Section) where n.title='" + nodeTitle + "' return n") if node: return node else: print("No node by that title") return
def main(): if not has_py2neo: sys.exit("[!] py2neo must be installed for this script.") if not has_evtx: sys.exit("[!] python-evtx must be installed for this script.") if not has_lxml: sys.exit("[!] lxml must be installed for this script.") if not has_lxml: sys.exit("[!] numpy must be installed for this script.") if not has_changefinder: sys.exit("[!] changefinder must be installed for this script.") if not has_flask: sys.exit("[!] Flask must be installed for this script.") try: graph_http = "http://" + NEO4J_USER + ":" + NEO4J_PASSWORD + "@" + NEO4J_SERVER + ":" + NEO4J_PORT + "/db/data/" GRAPH = Graph(graph_http) except: sys.exit("[!] Can't connect Neo4j Database.") print("[*] Script start. %s" % datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")) if args.run: try: app.run(host="0.0.0.0", port=WEB_PORT) except: sys.exit("[!] Can't runnning web application.") # Delete database data if args.delete: GRAPH.delete_all() print( "[*] Delete all nodes and relationships from this Neo4j database.") if args.evtx: evtx_file = args.evtx try: os.path.exists(evtx_file) except IOError: sys.exit("[!] Can't open file {0}.".format(evtx_file)) fb = open(evtx_file, "rb") fb_data = fb.read()[0:8] if fb_data != EVTX_HEADER: sys.exit("[!] This file is not EVTX format {0}.".format(evtx_file)) fb.close() parse_evtx(evtx_file, GRAPH) print("[*] Script end. %s" % datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S"))
def setup_neo4j(): g = Graph() if delete_all: logging.info('Deleting all Neo4j content...') g.delete_all() logging.debug('Verifying Neo4j schema...') g.run('CREATE CONSTRAINT ON (n:Event) ASSERT n.id IS UNIQUE') g.run('CREATE INDEX ON :Event(timestamp)') g.run('CREATE INDEX ON :Event(ingest_time)') return g
class Friends(object): def __init__(self, uri, username, password): self.neo = Graph(uri) self.uri = uri self.username = username self.password = password def create_person(self, name): node = Node("Person", name=name) self.neo.create(node) return node def make_mutual_friends(self, node1, node2): relationship = Relationship(node1, "FRIENDS_WITH", node2) relationship2 = Relationship(node2, "FRIENDS_WITH", node1) self.neo.create(relationship) self.neo.create(relationship2) def suggestions_for(self, node): returnType = "node" payload = { "order": "breadth_first", "uniqueness": "node_global", "relationships": { "type": "FRIENDS_WITH", "direction": "in" }, "return_filter" : { "body" : "position.length() == 2;", "language" : "javascript" }, "max_depth": 2 } payload = json.dumps(payload) headers = { "Accept": "application/json; charset=UTF-8", "Authorization": "Basic bmVvNGo6cGFzc3dvcmQ=", "Content-Type": "application/json" } uri = self.uri + "node/" + str(node._id) + "/traverse/" + returnType res = requests.post(uri, data=payload, headers=headers).json() recommendations_list = [] for el in res: recommendations_list.append(el["data"]["name"]) recommendations = ', '.join(recommendations_list) return recommendations def reset(self): self.neo.delete_all()
class neoGraph(): def __init__(self): # 连接数据库 self.graph = Graph("http://localhost:7474", username="******", password='******') self.data_message_list = [] def read_data(self): with open('data.csv', 'r', encoding='utf-8') as f: reader = csv.reader(f) self.data_message_list = list(reader) def create_node(self): # 创建主节点 main_node = Node(main_node_label, name=main_node_label) self.graph.create(main_node) i = 1 for each_message in self.data_message_list: # 创建节点 movie_node = Node(movie_name_label, name=each_message[0]) self.graph.create(movie_node) actor_node = Node(actor_name_label, name=each_message[-1]) self.graph.create(actor_node) direct_node = Node(director_name_label, name=each_message[1]) self.graph.create(direct_node) # 创建关系 print('no.') print(i) movie_to_main = Relationship(movie_node, 'no.' + str(i), main_node) i += 1 self.graph.create(movie_to_main) actor_to_movie = Relationship(actor_node, '一句话电影', movie_node) self.graph.create(actor_to_movie) direct_to_movie = Relationship(direct_node, '导演', movie_node) self.graph.create(direct_to_movie) def create_rel(self): matcher = NodeMatcher(self.graph) main_node = matcher.match(main_node_label, name=main_node_label) # 建立主节点和电影名的联系 i = 1 for each_movie in movie_name_list: movie_node = matcher.match(movie_name_label, name=each_movie) movie_to_main = Relationship(movie_node, 'no.' + str(i), main_node) i += 1 self.graph.create(movie_to_main) # 建立导演和电影名的联系 # 建立主演和电影名的联系 def clean_node(self): # 清空数据库 self.graph.delete_all()
class Friends(object): def __init__(self, uri, username, password): self.neo = Graph(uri) self.uri = uri self.username = username self.password = password def create_person(self, name): node = Node("Person", name=name) self.neo.create(node) return node def make_mutual_friends(self, node1, node2): relationship = Relationship(node1, "FRIENDS_WITH", node2) relationship2 = Relationship(node2, "FRIENDS_WITH", node1) self.neo.create(relationship) self.neo.create(relationship2) def suggestions_for(self, node): returnType = "node" payload = { "order": "breadth_first", "uniqueness": "node_global", "relationships": { "type": "FRIENDS_WITH", "direction": "in" }, "return_filter": { "body": "position.length() == 2;", "language": "javascript" }, "max_depth": 2 } payload = json.dumps(payload) headers = { "Accept": "application/json; charset=UTF-8", "Authorization": "Basic bmVvNGo6cGFzc3dvcmQ=", "Content-Type": "application/json" } uri = self.uri + "node/" + str(node._id) + "/traverse/" + returnType res = requests.post(uri, data=payload, headers=headers).json() recommendations_list = [] for el in res: recommendations_list.append(el["data"]["name"]) recommendations = ', '.join(recommendations_list) return recommendations def reset(self): self.neo.delete_all()
class DiseasePipeline(object): def __init__(self): self.graph = Graph(NEO4J_URL, auth = (NEO4J_USERNAME, NEO4J_PASSWORD)) self.graph.delete_all() # self.file = open('test.txt', "a+") def process_item(self, item, spider): # self.file.write(str(item) + '\n\n') # self.file.flush() item['name'] = item['name'].strip() node = self.graph.nodes.match('disease', name = item['name']).first() if node is None: # 如果不存在这种疾病,那就创建它 node = Node('disease', **item) self.graph.create(node) node = self.graph.nodes.match('disease', name = item['name']).first() else: # 如果已经存在了这个疾病,那就更新它 node.update(item) self.graph.merge(node, 'disease', 'name') # 建立相关疾病的联系 relatedDiseases = item['relatedDisease'] for disease in relatedDiseases: disease = disease.strip() newNode = self.graph.nodes.match('disease', name = disease).first() if newNode is None: # 如果不存在这种疾病,那就创建它,从而能够建立联系 newNode = Node('disease', name = disease) self.graph.create(newNode) newNode = self.graph.nodes.match('disease', name = disease).first() # 查询两种疾病之间是否存在相关联系,若不存在,则创建这个联系 r = Relationship(node, "ralate", newNode) if self.graph.match_one((node, newNode), r_type = 'relate') is None: self.graph.create(r) # 建立疾病与症状之间的联系 symptoms = item['typicalSymptom'].split('、') for symptom in symptoms: symptom = symptom.strip() # 消除多余的空格 newNode = self.graph.nodes.match('symptom', name = symptom).first() if newNode is None: # 如果不存在这个症状,那就创建它 newNode = Node('symptom', name = symptom) self.graph.create(newNode) newNode = self.graph.nodes.match('symptom', name = symptom).first() # 查询两种疾病之间是否存在伴随联系,若不存在,则创建这个联系 r = Relationship(node, 'have', newNode) if self.graph.match_one((node, newNode), r_type = 'have') is None: self.graph.create(r)
def main(): parser = CustomParser() parser.add_argument("--input", "-i", help="sets input folder") args = parser.parse_args() # print args.input extract_codex_data(args.input) # codex_extractor.main(args) graph_http = "http://" + NEO4J_USER + ":" + \ NEO4J_PASSWORD + "@:" + NEO4J_PORT + "/db/data/" GRAPH = Graph(graph_http) GRAPH.delete_all() with GRAPH.begin() as tx: data = get_tools_info(args.input) for apt in data: add_apt(tx, apt) for file in data[apt]: add_mw(tx, file['MD5'], file['SHA1'], file['SHA256'], file['Description'], file['File_Name'], file['Compilation_Time'] ) add_apt_mw_rel(tx, apt, file['MD5']) for pdb in file['PDB']: add_pdb_mw_rel(tx, pdb, file['MD5']) try: add_mw_signature(tx, file['Signatures'], file['MD5']) for st in file['Interesting']: add_int_mw_rel(tx, st, file['MD5']) except KeyError: print file['MD5'] + " error" print KeyError.message pass try: for macro in file['Office']: add_macro_mw_rel(tx, macro, file['MD5']) add_office_last_saved(tx, file['Last_Saved'], file['MD5']) except Exception: pass tx.run(cypher_clean_empty_comp) tx.run(cypher_clean_empty_filetype) tx.run(cypher_clean_empty_filename) tx.run(cypher_clean_empty_pdb) tx.run(cypher_clean_empty_last_saved)
class Neo4j(): def __init__(self): self.tp = textProcess.TextProcess() self.graph = Graph("http://localhost:7474", username="******", password='******') self.graph.delete_all() def connect_db(self): return pymysql.connect(host='192.168.1.101', port=3306, user='******', password='', database='xinhua', charset='utf8') def query_news(self, sql_str): logging.info(sql_str) con = self.connect_db() cur = con.cursor() cur.execute(sql_str) rows = cur.fetchall() cur.close() con.close() return rows def test(self): rows = self.query_news("select * from news limit 100") for row in rows: title = row[3] content = row[4] catalog = row[6] keywords = list(self.tp.tfidf(title + content)) date = time.mktime(row[2].timetuple()) date_str = row[2].strftime("%Y-%m-%d %H:%S:%M") tx = self.graph.begin() news = Node('news', title=title, content=content, date=date_str, catalog=catalog, url=row[5], mktime=date) # b = Node('PersonTest', name='张三1', key='s1', age=33) tx.merge(news, 'news', 'title') for k in keywords: keyword = Node('keyword', key=k) tx.merge(keyword, 'keyword', 'key') relation = Relationship(news, 'include', keyword) tx.create(relation) tx.commit()
def init_db(delete = False): """ initializes the db connection and returns the graph object. TODO: Enable variables """ uri = "bolt://localhost:7687" print("Connecting to database at %s.." % uri) graph = Graph(uri, auth=("neo4j", "admin")) print("Connected.") if delete: print("Deleting graph at %s.." % uri) graph.delete_all() print("Deleted") return graph
def main1(): authenticate("localhost:7474", "neo4j", "1234") graph = Graph(GRAPH_CONNECTION_STRNIG) graph.delete_all() banana = Node("Fruit", name="banana", colour="yellow", tasty=True) graph.create(banana) t = graph.merge_one("Fruit", 'name', 'apple') t['colour'] = 'green' t['tasty'] = True t.push()
def get_graph(new_graph=True): # load / declare the database # inputs: new_graph - if true then existing graph is cleared # outputs: graph - a py2neo graph object graph = Graph("bolt://localhost:7687", user="******", password="******") graph.begin() if new_graph == True: graph.delete_all() return graph
class Build_Configuration(): def __init__(self): self.graph = Graph() self.graph.delete_all() self.namespace = ["Start"] self.parent_node = [] def check_duplicates(self, label, name): #print "label",label,name if self.graph.find_one(label, property_key="name", property_value=name) != None: raise ValueError("Duplicate Node", label, name) def get_namespace(self, name): print self.namespace, name temp = copy.deepcopy(self.namespace) temp.append(name) return_value = "/".join(temp) return return_value def get_parent_node(self): return self.parent_node[-1] def pop_namespace(self): del self.namespace[-1] del self.parent_node[-1] # concept of namespace name is a string which ensures unique name # the name is essentially the directory structure of the tree def construct_node(self, push_namespace, relationship, label, name, properties): namespace = self.get_namespace(name) self.check_duplicates(label, name=namespace) node = Node(label) node.properties["namespace"] = namespace node.properties["name"] = name for i in properties.keys(): node.properties[i] = properties[i] self.graph.create(node) if len(self.parent_node) != 0: relation_enity = Relationship(self.get_parent_node(), relationship, node) self.graph.create(relation_enity) if push_namespace == True: self.namespace.append(name) self.parent_node.append(node)
def conn_neo4j(): logger.info("Connect DB...") global g try: g = Graph("http://localhost:7474", username="******", password="******") g.delete_all() logger.info("Delete all nodes & relations...") logger.info("Connect successfully...") except: print("Connect DB Error...") # traceback.print_exc() sys.exit(0)
def set_config(): global xunyiwenyao_level,xunyiwenyao_node,linchuang_medicine_level,linchuang_medicine_node,entity_num,relation_num,buwei,keshi,graph,matcher graph = Graph("http://localhost:7474", username="******", password='******') matcher = NodeMatcher(graph) graph.delete_all() xunyiwenyao_level = [] #寻医问药中所有的层级结构,用来查询,不区分疾病和症状 xunyiwenyao_node = [] #科室与部位的节点数据 linchuang_medicine_level = [] linchuang_medicine_node = [] entity_num = {'科室':0,'部位':0,'临床分级':0} relation_num= {'包括':0} buwei = ['全身', '男性股沟', '颈部', '眼', '生殖部位', '下肢', '口', '上肢', '腰部', '耳', '四肢', '腹部', '头部', '皮肤', '女性盆骨', '排泄部位', '胸部', '皮肤', '鼻'] keshi = ['眼科', '五官科', '皮肤科', '骨外科', '康复科', '中医骨伤科', '中医科', '耳鼻喉科', '理疗科', '体检科', '皮肤性病科', '泌尿内科', '遗传病科', '肝胆外科', '中西医结合科', '内科', '心胸外科', '肿瘤内科', '营养科', '药品科', '外科', '肛肠科', '神经内科', '烧伤科', '口腔科', '血液科', '小儿内科', '心理科', '神经外科', '泌尿外科', '肾内科', '消化内科', '肿瘤外科', '风湿免疫科', '呼吸内科', '普外科', '内分泌科', '妇产科', '妇科', '男科', '儿科综合', '精神科', '急诊科', '感染科', '其他科室', '传染科', '中医理疗科', '心内科', '小儿外科', '整形美容科', '儿科', '性病科', '产科', '肿瘤科', '生殖健康', '保健养生', '辅助检查', '重症监护', '其他综合', '中医综合', '不孕不育', '肝病', '减肥']
def main(): if not has_py2neo: sys.exit("[!] py2neo must be installed for this script.") if not has_evtx: sys.exit("[!] python-evtx must be installed for this script.") if not has_lxml: sys.exit("[!] lxml must be installed for this script.") if not has_lxml: sys.exit("[!] numpy must be installed for this script.") if not has_changefinder: sys.exit("[!] changefinder must be installed for this script.") try: graph_http = "http://" + NEO4J_USER + ":" + NEO4J_PASSWORD +"@" + NEO4J_SERVER + ":" + NEO4J_PORT + "/db/data/" GRAPH = Graph(graph_http) except: sys.exit("[!] Can't connect Neo4j Database.") print("[*] Script start. %s" % datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")) if args.run: try: app.run(threaded=True, host="0.0.0.0", port=WEB_PORT) except: sys.exit("[!] Can't runnning web application.") # Delete database data if args.delete: GRAPH.delete_all() print("[*] Delete all nodes and relationships from this Neo4j database.") if args.evtx: for evtx_file in args.evtx: if not os.path.isfile(evtx_file): sys.exit("[!] Can't open file {0}.".format(evtx_file)) parse_evtx(args.evtx, GRAPH) if args.xmls: for xml_file in args.xmls: if not os.path.isfile(xml_file): sys.exit("[!] Can't open file {0}.".format(xml_file)) parse_evtx(args.xmls, GRAPH) print("[*] Script end. %s" % datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S"))
class Build_Configuration: def __init__(self): self.graph = Graph() self.graph.delete_all() self.namespace = ["Start"] self.parent_node = [] def check_duplicates(self, label, name): # print "label",label,name if self.graph.find_one(label, property_key="name", property_value=name) != None: raise ValueError("Duplicate Node", label, name) def get_namespace(self, name): print self.namespace, name temp = copy.deepcopy(self.namespace) temp.append(name) return_value = "/".join(temp) return return_value def get_parent_node(self): return self.parent_node[-1] def pop_namespace(self): del self.namespace[-1] del self.parent_node[-1] # concept of namespace name is a string which ensures unique name # the name is essentially the directory structure of the tree def construct_node(self, push_namespace, relationship, label, name, properties): namespace = self.get_namespace(name) self.check_duplicates(label, name=namespace) node = Node(label) node.properties["namespace"] = namespace node.properties["name"] = name for i in properties.keys(): node.properties[i] = properties[i] self.graph.create(node) if len(self.parent_node) != 0: relation_enity = Relationship(self.get_parent_node(), relationship, node) self.graph.create(relation_enity) if push_namespace == True: self.namespace.append(name) self.parent_node.append(node)
class Neo4j(): def __init__(self, host='localhost:7474', username='******', password='******'): if not has_py2neo: raise Exception('py2neo is required, please install: pip install py2neo') authenticate(host, username, password) self.graph = Graph("http://{}/db/data/".format(host)) def load_events_directory(self, directory): self.events = [] for path in glob.glob(os.path.join(directory, '*.json')): e = MISPEvent() e.load(path) self.import_event(e) def del_all(self): self.graph.delete_all() def import_event(self, event): tx = self.graph.begin() event_node = Node('Event', uuid=event.uuid, name=event.info) # event_node['distribution'] = event.distribution # event_node['threat_level_id'] = event.threat_level_id # event_node['analysis'] = event.analysis # event_node['published'] = event.published # event_node['date'] = event.date.isoformat() tx.create(event_node) for a in event.attributes: attr_node = Node('Attribute', a.type, uuid=a.uuid) attr_node['category'] = a.category attr_node['name'] = a.value # attr_node['to_ids'] = a.to_ids # attr_node['comment'] = a.comment # attr_node['distribution'] = a.distribution tx.create(attr_node) member_rel = Relationship(event_node, "is member", attr_node) tx.create(member_rel) val = Node('Value', name=a.value) ev = Relationship(event_node, "has", val) av = Relationship(attr_node, "is", val) s = val | ev | av tx.merge(s) #tx.graph.push(s) tx.commit()
def main2(): authenticate("localhost:7474", "neo4j", "1234") graph = Graph(GRAPH_CONNECTION_STRNIG) graph.delete_all() banana = Node("Fruit", name="banana", colour="yellow", tasty=True) graph.create(banana) t = graph.merge_one("Fruit", 'name', 'apple') t['colour'] = 'green' t['tasty'] = True t.push() alice = Node("Person", name="Alice") bob = Node("Person", name="Bob") alice_knows_bob = Relationship(alice, "KNOWS", bob, since=1999) graph.create(alice) graph.create(bob) graph.create(alice_knows_bob)
elif nodes[0] == 0: q.put("MATCH (n:roots {type:'NotSponsored'}) MATCH (f:files {filename:'"+nodes[1]+"'}) MERGE(n)-[:has]->(f)") else: q.put("MATCH (n:roots {type:'Testing'}) MATCH (f:files {filename:'"+nodes[1]+"'}) MERGE(n)-[:has]->(f)") for n in nodes[2:]: q.put("MERGE (w:website {website:'"+n+"'})") #create website node if it doesn't already exist q.put("MATCH (f:files {filename:'"+nodes[1]+"'}) MATCH (w:website {website:'"+n+"'}) MERGE(f)-[:links]->(w)") train = pd.read_csv("./data/train.csv", header=0, delimiter=",", quoting=3) sample = pd.read_csv("./data/sampleSubmission.csv", header=0, delimiter=",", quoting=3) print("Starting processing...") authenticate("localhost:7474", "neo4j", "neo4j") #username and password graph = Graph() #by default, py2neo opens localhost graph.delete_all() #deletes all nodes and edges (clears old data) tx = graph.cypher.begin() tx.append("CREATE(n:roots {type: 'Sponsored'})") tx.append("CREATE(n:roots {type: 'NotSponsored'})") tx.append("CREATE(n:roots {type: 'Testing'})") tx.commit() q = Queue.Queue() for i, zipFile in enumerate(process_zips): archive = zipfile.ZipFile(zipFile, 'r') file_paths = zipfile.ZipFile.namelist(archive) bar = ProgressBar(len(file_paths), max_width=40) pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()-1 or 1) for k, file_path in enumerate(file_paths): data = archive.read(file_path) openfile = file_path[2:] #filename
print("Ended Tags Load" + str(datetime.datetime.now())) return ##################BEGIN PROCESS HERE######################### # ------------------------------------------------------------ # this 'for loop' will set 'line' to an input line from system # standard input file # ------------------------------------------------------------ initTags() graph = Graph() graph.delete_all() #CREATE ALL GENRE NODES HERE ActionGenreNode = Node("Genre", name="Action") AdventureGenreNode = Node("Genre", name="Adventure") AnimationGenreNode = Node("Genre", name="Animation") ChildrensGenreNode = Node("Genre", name="Childrens") ComedyGenreNode = Node("Genre", name="Comedy") CrimeGenreNode = Node("Genre", name="Crime") DocumentaryGenreNode = Node("Genre", name="Documentary") DramaGenreNode = Node("Genre", name="Drama") FantasyGenreNode = Node("Genre", name="Fantasy") FilmNoirGenreNode = Node("Genre", name="FilmNoir") HorrorGenreNode = Node("Genre", name="Horror") MusicalGenreNode = Node("Genre", name="Musical")
class TestUser(unittest.TestCase): USER_DATA = {'contributors_enabled': False, 'created_at': 'Fri Jun 12 11:13:21 +0000 2009', 'default_profile': False, 'default_profile_image': False, 'description': 'We are the UK’s leading energy supplier and committed to ' 'looking after your world. For Emergency numbers visit ' 'http://t.co/GVkMDCUzW3', 'entities': {'description': {'urls': [{'display_url': 'britishgas.co.uk/emergency', 'expanded_url': 'http://www.britishgas.co.uk/emergency', 'indices': [111, 133], 'url': 'http://t.co/GVkMDCUzW3'}]}, 'url': {'urls': [{'display_url': 'britishgas.co.uk/the-source', 'expanded_url': 'http://www.britishgas.co.uk/the-source', 'indices': [0, 22], 'url': 'http://t.co/rlasQ9hHeu'}]}}, 'favourites_count': 431, 'follow_request_sent': False, 'followers_count': 36081, 'following': False, 'friends_count': 4774, 'geo_enabled': True, 'id': 46630225, 'id_str': '46630225', 'is_translation_enabled': False, 'is_translator': False, 'lang': 'en', 'listed_count': 400, 'location': 'Staines, Middlesex', 'name': 'British Gas ', 'notifications': False, 'profile_background_color': '00AEDE', 'profile_background_image_url': 'http://pbs.twimg.com/profile_background_images/831694128/7187a2d2a890b67c21ae04c18861f5b9.jpeg', 'profile_background_image_url_https': 'https://pbs.twimg.com/profile_background_images/831694128/7187a2d2a890b67c21ae04c18861f5b9.jpeg', 'profile_background_tile': False, 'profile_banner_url': 'https://pbs.twimg.com/profile_banners/46630225/1400584801', 'profile_image_url': 'http://pbs.twimg.com/profile_images/552048129055289344/6oPZvR3T_normal.jpeg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/552048129055289344/6oPZvR3T_normal.jpeg', 'profile_link_color': '1890C4', 'profile_location': None, 'profile_sidebar_border_color': 'FFFFFF', 'profile_sidebar_fill_color': 'D9EDF9', 'profile_text_color': '333333', 'profile_use_background_image': True, 'protected': False, 'screen_name': 'BritishGas', 'status': {'contributors': None, 'coordinates': None, 'created_at': 'Mon Mar 02 18:45:18 +0000 2015', 'entities': {'hashtags': [], 'media': [{'display_url': 'pic.twitter.com/ec4iusBe4Q', 'expanded_url': 'http://twitter.com/BritishGas/status/572467734367191041/photo/1', 'id': 572425479120007168, 'id_str': '572425479120007168', 'indices': [108, 130], 'media_url': 'http://pbs.twimg.com/media/B_Gp8L9UsAAe8ap.png', 'media_url_https': 'https://pbs.twimg.com/media/B_Gp8L9UsAAe8ap.png', 'sizes': {'large': {'h': 500, 'resize': 'fit', 'w': 1000}, 'medium': {'h': 300, 'resize': 'fit', 'w': 600}, 'small': {'h': 170, 'resize': 'fit', 'w': 340}, 'thumb': {'h': 150, 'resize': 'crop', 'w': 150}}, 'type': 'photo', 'url': 'http://t.co/ec4iusBe4Q'}], 'symbols': [], 'urls': [], 'user_mentions': []}, 'favorite_count': 4, 'favorited': False, 'geo': None, 'id': 572467734367191041, 'id_str': '572467734367191041', 'in_reply_to_screen_name': None, 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'lang': 'en', 'place': None, 'possibly_sensitive': False, 'retweet_count': 3, 'retweeted': False, 'source': '<a href="https://ads.twitter.com" ' 'rel="nofollow">Twitter Ads</a>', 'text': 'Afraid of the dust bunny lurking behind your fridge? ' 'Check out our guide to cleaning up those fridge coils: ' 'http://t.co/ec4iusBe4Q', 'truncated': False}, 'statuses_count': 13664, 'time_zone': 'London', 'url': 'http://t.co/rlasQ9hHeu', 'utc_offset': 0, 'verified': True} def setUp(self): self.g = Graph(get_graph_url("dev")) def tearDown(self): self.g.delete_all() def testAddNewUser(self): u = User.new(self.g, properties=self.USER_DATA) u.get_followers()
#graphdb = Graph('http://localhost:7474/default.graphdb') #Sam consumer_key = '' consumer_secret = '' access_token = '' access_token_secret = '' graphdb = Graph() graphdb.delete_all() INSERT_USER_URL_QUERY = ''' MERGE (user:User {username: {username}}) MERGE (url:URL {url: {url}}) CREATE UNIQUE (user)-[:SHARED]->(url) FOREACH (kw in {keywords} | MERGE (k:Keyword {text: kw}) CREATE UNIQUE (k)<-[:IS_ABOUT]-(url)) FOREACH (author in {authors} | MERGE (a:Author {name: author}) CREATE UNIQUE(a)<-[:WRITTEN_BY]-(url)) ''' auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth, wait_on_rate_limit = True, wait_on_rate_limit_notify = True) #api = tweepy.API(auth)
#!/usr/bin/python from __future__ import unicode_literals from py2neo import authenticate, Graph import argparse import sys if __name__ == '__main__': parser = argparse.ArgumentParser(description="ctAutocompletion database clearing script") parser.add_argument('--src', dest='src', default=None, help='') parser.add_argument('--elastic', dest='elastic', default=None, help='Elasticsearch authentication (optional)') parser.add_argument('--neo4j', dest='neo4j', default=None, help='Neo4j authentication (required)') args = parser.parse_args() try: # Check if Neo4j auth is provided if args.neo4j: (username, password) = tuple(args.neo4j.split(":")) else: username = "******" password = "******" authenticate("localhost:7474", username, password) db = Graph() db.delete_all() except Exception as err: print("Please provide Neo4j authentication\n\t--neo4j 'username:secret-password'") sys.exit(1)
class SyntaxGraph(): """ The aim of this class is to find associated words to database syntax. A user will input a sentence, and these associations will be used to find the correct SQL statement to execute in the database. The relations between words are modelled as a graph. The nodes of the graph are the words, and the edges (relationships) between nodes represent when a word means another word (e.g. is a synonym). The graph is "seeded" using a set of database syntax words, finding synonyms/related words to these initial words using a call to a thesaurus API. The graph is then "grown" from the resulting synonyms using subsequent API calls, in a recursive fashion. When a user enters a sentence, this graph will be used to find database syntax words which are within a certain "degree of separation" from each word in the sentence, in an attempt to start building a SQL query from this sentence. """ def __init__(self, seed_words=None, seed_mappings=None): self.sql_terms = SQLTerms().sql_terms self.graph = Graph(DB_URI) self.tx = self.graph.cypher.begin() self.seed_mappings = seed_mappings or {'where': ['filter', 'for', 'during'], 'from': ['source', 'in'], 'into': ['toward', 'within', 'inside'], 'group':['by'], 'and': ['with']} self.seed_words = seed_words or [x for x in self.sql_terms if x not in self.seed_mappings] self.seed_words.extend([x for x in self.seed_mappings.iterkeys()]) self.exclude_words = ['display'] def seed(self, reset=False): print 'Seeding graph' if reset: self.graph.delete_all() for word in self.seed_words: if not self.already_called(word): self.add_synonyms(word) if word in self.seed_mappings: print 'Mapping %s to %s' % ( ','.join(self.seed_mappings[word]), word ) base = self.graph.merge_one('Word', 'name', word) synonyms = [self.graph.merge_one('Word', 'name', x) for x in self.seed_mappings[word]] [self.graph.create_unique(Relationship(base, 'MEANS', synonym)) for synonym in synonyms] [self.graph.create_unique(Relationship(synonym, 'MEANS', base)) for synonym in synonyms] def grow(self, levels=1): print 'Levels left: %d' % levels query = ''' MATCH (w:Word) WHERE NOT HAS (w.called) RETURN w.name ''' results = self.graph.cypher.execute(query) for word in results: self.add_synonyms(word['w.name']) if levels > 1: self.grow(levels-1) def already_called(self, word): if len (self.graph.cypher.execute('''MATCH (w:Word) WHERE w.name = '%s' AND HAS (w.called) RETURN w.name ''' % word) ) > 0: return True def update_set_called(self, word): word_node = self.graph.merge_one('Word', 'name', word) word_node.properties['called'] = 1 word_node.push() def add_synonyms(self, word): url = 'http://words.bighugelabs.com/api/2/%s/%s/json' % (API_KEY, word) print url response = requests.get(url) try: data = response.json() except JSONDecodeError: self.update_set_called(word) return if 'verb' in data: for key in data['verb']: # Synonyms: words are all interrelated (connected graph) if key == 'syn': synonyms = [word] synonyms.extend([x for x in data['verb'][key] if ' ' not in x]) nodes = [self.graph.merge_one('Word', 'name', x) for x in synonyms] [self.graph.create_unique(Relationship(i, 'MEANS', j)) for j in nodes for i in nodes if i!=j] # Similar / user defined words: words are related both ways between root and related words (both direction) elif key in ('sim', 'usr'): related_words = [word] related_words.extend([x for x in data['verb'][key] if ' ' not in x]) nodes = [self.graph.merge_one('Word', 'name', x) for x in related_words] [self.graph.create_unique(Relationship(nodes[i], 'MEANS', nodes[j])) for j in range(len(nodes)) for i in range(len(nodes)) if (i+j>0 and i*j==0)] # Related words: words are related only from root to related word (one direction) elif key == 'rel': related_words = [word] related_words.extend([x for x in data['verb'][key] if ' ' not in x]) nodes = [self.graph.merge_one('Word', 'name', x) for x in related_words] [self.graph.create_unique(Relationship(nodes[0], 'MEANS', nodes[i])) for i in range(1, len(nodes))] self.update_set_called(word) def replace_word(self, word, max_degree_separation=2): if word in self.seed_words or word in self.exclude_words: return word replacement_candidates = [] for seed_word in self.seed_words: query = '''MATCH p=shortestPath((w:Word{name:"%s"})-[*]-(n:Word{name:"%s"})) RETURN length(p), n.name ''' % (word, seed_word) results = self.graph.cypher.execute(query) try: replacement_candidates.append(min([(row['length(p)'], row['n.name']) for row in results])) except ValueError: pass if len(replacement_candidates) > 0: replacement = min(replacement_candidates) if replacement[0] <= max_degree_separation: return replacement[1] def replace_text(self, text): pattern = re.compile('[\W_]+') cleaned = [] replacements = [] for word in text.split(): cleaned_word = pattern.sub('', word) if cleaned_word not in [x[0] for x in cleaned]: cleaned.append([cleaned_word, self.replace_word(cleaned_word)]) replacements.append(self.replace_word(cleaned_word) or cleaned_word) return ' '.join(replacements)
class Neo4jWrapper(object): def __init__(self, user_name, password, connection_string="", current_id=1): self.connection_string = connection_string \ if connection_string != "" \ else "http://"+ user_name+":"+password+"@localhost:7474/db/data/" self.graph_db = Graph(self.connection_string) self.current_id = current_id def delete_all_nodes(self): self.graph_db.delete_all() def insert_single_node(self, node_map): """ Insert pre specified node type :param node_map: format {'url':, 'brand': , 'tld': , 'website':} :return: """ self.graph_db.cypher.execute("CREATE (w:Websites {id:{I}, brand:{BRAND}, website:{W}, url:{U}, tld:{TLD}})", {"BRAND": node_map['brand'], "I": self.current_id, "W": node_map['website'], "U": node_map['url'], "TLD": node_map['tld']}) self.current_id += 1 def insert_as_transaction(self, node_map_list): """ Insert nodes with transaction :param node_map_list: list of data in format {'url':, 'brand': , 'tld': , 'website':} :return: None """ tx = self.graph_db.cypher.begin() for node_map in node_map_list: self.current_id += 1 tx.append("CREATE (w:Websites {id:{I}, brand:{B}, website:{W}, url:{U}, tld:{T}})", { "B": node_map['brand'], "I": self.current_id, "W": node_map['website'], "U": node_map['url'], "T": node_map['tld']}) tx.commit() def batch_create(self, node_map_list): """ Execute multiple insert as batch jobs :param node_map_list: """ batch = neo4j.WriteBatch(self.graph_db) for node_map in node_map_list: self.current_id += 1 batch.append(self.create_cypher_job(None, node_map)) return batch.submit() def create_cypher_job(self, statement=None, params_dict=dict()): """ Create cypher job for bach insert :param statement: :param params_dict: :return: neo4j.CypherJob """ default_statement = "CREATE (w:Websites {id:{I}, brand:{B}, website:{W}, url:{U}, tld:{T}})" default_params = {"B": params_dict['brand'], "I": self.current_id, "W": params_dict['website'], "U": params_dict['url'], "T": params_dict['tld']} if statement is None or len(params_dict) is 0: return neo4j.CypherJob(default_statement, default_params) else: return neo4j.CypherJob(statement, params_dict) def insert_single_with_loop(self, node_map_list, insertion_type="single", insertion_size=0): """ Convenient method to loop on data list :param node_map_list: list for dict in format {'url':, 'brand': , 'tld': , 'website':} :param insertion_type: single | transaction | batch :param insertion_size: integer - the number of nodes inserted in a single transaction :return: None """ cnt = 0 temp_node_holder = [] for node_map in node_map_list: if insertion_type is 'single': self.insert_single_node(node_map) elif insertion_type is 'transaction' or 'batch': cnt += 1 if cnt % insertion_size is 0: self.insert_as_transaction(temp_node_holder) \ if insertion_type is 'transaction' \ else self.batch_create(temp_node_holder) # replace line instead new line sys.stdout.write("\r++++INSERTED %d++++" % cnt) sys.stdout.flush() temp_node_holder = [] else: temp_node_holder.append(node_map) # insert any remaining if insertion_type is 'transaction' or 'batch': self.insert_as_transaction(temp_node_holder) \ if insertion_type is 'transaction' \ else self.batch_create(temp_node_holder)
import unicodedata from py2neo import Graph, Node, Relationship g = Graph() g.delete_all() starts = 'Stockholm', 'Edinburgh' for start in starts: print "\nStarting " + start start_node = g.merge_one('Airport', property_key='name', property_value=start) for l in open(start + '.csv'): items = l.split(',') airline = unicodedata.normalize('NFKD', unicode(items[0], encoding='utf-8')).encode('ascii', 'ignore') print "Airline is " + airline for airport in items[1:]: airport = unicodedata.normalize('NFKD', unicode(airport, encoding='utf-8')).encode('ascii', 'ignore').strip().split('-')[0] print "Endpoint is " + airport end_node = g.merge_one("Airport", property_key='name', property_value=airport) g.create(Relationship(start_node, "FLIES_TO", end_node, airline=airline))
def setup_schema(): def constraint(label, key): try: graph.schema.create_uniqueness_constraint(label, key) except: pass constraint("Repository", "full_name") constraint("User", "login") constraint("PullRequest", "id") constraint('ProcessingStatus', 'last_processed_date') graph = Graph(os.environ.get('NEO4J_CONNECTION_STRING')) if args.drop: print '!!! DROPPING DATABASE !!!' graph.delete_all(); setup_schema(); if args.file: load_from_file(args.file) if args.download_from_date: load_from_date(args.download_from_date) if args.cont: status = graph.find_one('ProcessingStatus') if status: date = status.properties['date'] date = datetime.datetime.strptime(date, '%Y-%m-%d-%H') date += datetime.timedelta(hours=1) else: date = datetime.datetime(2011, 2, 12)
__author__ = 'Marnee Dearman' from py2neo import Graph, Node, Relationship from settings import graphene # What is the URL to my NEO4J tuple_graph = Graph(graphene.DATABASE_URL) print tuple_graph # start over with delete so I can run the whole script at one time tuple_graph.delete_all() # Let's try modeling the group # using Py2Neo. This is the Python Meetup after all # Example CYPHER # CREATE (m:MEMBER {name:"Marnee"} ) # RETURN m # Create a member aliased as "m" with name "Marnee" # Return that member's node (m) # Example PY2NEO # one way is to setup a dictionary with the properties for the new Node, in this case MEMBER # member_properties = {} # member_properties["name"] = "Julian" # show the code for py2neo # member_node = Node.cast("MEMBER", member_properties) # member_node = Node.cast("MEMBER", name="Julian", python_years=5) # tuple_graph.create(member_node) # Julian is lonely, let's give him a friend
class test_pipeline(unittest.TestCase): LEN_DATETIME = 26 LEN_TEST_FILE = 632 def setUp(self): try: __location__ = os.path.realpath( os.path.join(os.getcwd(), os.path.dirname(__file__))) self.src = open( os.path.join(__location__, "data/bit-test-data.txt")) self.badFreq = open( os.path.join(__location__, "data/bad-frequency.txt")) self.badStartTime = open( os.path.join(__location__, "data/bad-starttime.txt")) self.graph = Graph("http://localhost:8484/db/data") self.graph.delete_all() self.service = WaferService(self.graph) except: print "Error during unittest setup" def tearDown(self): self.graph.delete_all() # # File tests # def test_open(self): self.assertEquals(len(self.src.read().split("\n")), 20) # # Parser tests # def test_parser(self): bitdo = parser.BITdo(self.src) self.assertEquals(len(bitdo.toJson()), test_pipeline.LEN_TEST_FILE) self.assertEquals(len(bitdo.channels.keys()), 5) self.assertEquals(bitdo.header["SamplingFrequency"], "1000") self.assertEquals(len(bitdo.channels["EMG"]), 16) # Assure that datetime is to microsecond precision self.assertEquals( len(bitdo.header["StartDateTime"]), test_pipeline.LEN_DATETIME) def test_parser_errors(self): self.assertRaises(AttributeError, parser.BITdo, (self.badFreq)) self.assertRaises(AttributeError, parser.BITdo, (self.badStartTime)) # # Aggregator tests # def test_aggregator_nums(self): a = [0, 0, 1, 1, 1] s = aggregator.streaksIn(a) self.assertEquals(s[0].getStreaks(), [2]) self.assertEquals(s[0].getStreakExp(2), [4]) self.assertEquals(s[1].getStreaks(), [3]) self.assertEquals(s[1].getStreakExp(2), [9]) def test_aggregator_bools(self): b = [True, False, False, True, False] s = aggregator.streaksIn(b) self.assertEquals(s[True].getStreaks(), [1, 1]) self.assertEquals(s[False].getStreaks(), [2, 1]) self.assertEquals(s[False].getStreakExp(2), [4, 1]) def test_aggregator_strings(self): c = ["cat", "826", "826", "826", "~~", "~~", "cat", "cat", "~~"] s = aggregator.streaksIn(c) self.assertEquals(s["cat"].getStreaks(), [1, 2]) self.assertEquals(s["cat"].getStreakExp(2), [1, 4]) self.assertEquals(s["826"].getStreaks(), [3]) self.assertEquals(s["826"].getStreakExp(3), [27]) self.assertEquals(s["~~"].getStreaks(), [2, 1]) self.assertEquals(s["~~"].getStreakExp(-1), [0.5, 1]) def test_aggregator_average(self): bitdo = parser.BITdo(self.src) self.assertEquals(aggregator.average(bitdo.channels['EMG']), 525.4375) self.assertEquals(aggregator.average([1, 2, 3]), 2) self.assertEquals(aggregator.average([x for x in range(1000)]), 499.5) # # Graph Service # def test_add_new_user(self): user = self.service.add_user("Duke") userid = user.properties["userid"] activity = self.service.add_activity( userid, "Free Throws", "no description") activityname = activity.properties["name"] self.service.add_moment( userid, activityname, "timestamp", ["a1:true", "a2:false"]) self.service.add_moment( userid, activityname, "timestamp", ["a1:true", "a2:false"]) self.assertEquals(count(self.graph.find("User")), 1) self.assertEquals(count(self.graph.find("Activity")), 1) self.assertEquals(count(self.graph.find("Moment")), 2) self.assertEquals(count(self.graph.find("Annotation")), 2) # # Graph API # def test_post_user(self): r = newUser('Thaddeus') self.assertEquals(r.status_code, 200) def test_post_user_fails(self): r = requests.post('http://localhost:8000/users', {}) self.assertEquals(r.status_code, 400) def test_post_activity(self): r = newUser('Thaddeus') self.assertEquals(r.status_code, 200) r = newActivity('Thaddeus', 'Free-throw shooting') self.assertEquals(r.status_code, 200) def test_post_activity_fails(self): r = newUser('Thaddeus') self.assertEquals(r.status_code, 200) # Test explicitly, i.e. not using the helper function # so we are able to neglect parameters r = requests.post('http://localhost:8000/activities', { 'userid': 'Thaddeus'}) self.assertEquals(r.status_code, 400) r = requests.post('http://localhost:8000/users', { 'name': 'Free-throw shooting'}) self.assertEquals(r.status_code, 400) def test_post_moment(self): r = newUser('Thaddeus') self.assertEquals(r.status_code, 200) r = newActivity('Thaddeus', 'Free-throw shooting') self.assertEquals(r.status_code, 200) r = newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:true", "swish:true"]) self.assertEquals(r.status_code, 201) def test_post_moment_fails(self): r = newUser('Thaddeus') self.assertEquals(r.status_code, 200) r = newActivity('Thaddeus', 'Free-throw shooting') self.assertEquals(r.status_code, 200) # Test explicitly, i.e. not using the helper function # so we are able to neglect parameters annotations = ["make:true", "swish:true"] r = requests.post('http://localhost:8000/moments', { # missing userid 'name': 'Free-throw shooting', 'timestamp': now(), 'annotations[]': annotations}) self.assertEquals(r.status_code, 400) r = requests.post('http://localhost:8000/moments', { 'userid': 'Thaddeus', 'name': 'Free-throw shooting', 'timestamp': now() # missing annotations }) self.assertEquals(r.status_code, 400) r = requests.post('http://localhost:8000/moments', { 'userid': 'Thaddeus', 'name': 'Free-throw shooting', 'timestamp': now(), # it's `annotations[]`... sigh 'annotations': annotations}) self.assertEquals(r.status_code, 400) def test_get_moment(self): r = newUser('Thaddeus') self.assertEquals(r.status_code, 200) r = newActivity('Thaddeus', 'Free-throw shooting') self.assertEquals(r.status_code, 200) newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:true", "swish:true"]) newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:false", "swish:false"]) newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:true", "swish:false"]) r = getMoments('Thaddeus', 'Free-throw shooting') self.assertEquals(r.status_code, 200) self.assertEquals(len(r.json()), 3) def test_get_moment_fails(self): r = newUser('Thaddeus') self.assertEquals(r.status_code, 200) r = newActivity('Thaddeus', 'Basketball') self.assertEquals(r.status_code, 200) newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:true", "swish:true"]) newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:false", "swish:false"]) newMoment('Thaddeus', 'Free-throw shooting', now(), ["make:true", "swish:false"]) # wrong acitivity name r = getMoments('Thaddeus', 'B_sketb_ll') self.assertEquals(r.status_code, 400)
# html_text = Node("HTML text", page_source=html_of_new_page) # html_rel = Relationship(new_node,"HTML text",html_text) # gp.create(html_rel) gp.commit() def get_the_available_crawlers(): crawlers = ["CRAWLER-2", "CRAWLER-3", "CRAWLER-4"] return crawlers graph_database_location = "http://"+database+":7474/db/data/" graph = Graph(graph_database_location, user='******', password='******') # connect to the local graph database if delete_graph_history == "yes": graph.delete_all() # Delete all the previous made nodes and relationship print("DATABASE DELETED !") gp = graph.begin() coordinates = [] # create the list for coordinates coordinates = generate_coordinates(width, height, coordinates) # generates coordinates based on the diff and the resolution coordinates = generate_random_coordinates(coordinates) # already generated coordinates are shuffled randomly chrome_options = Options() chrome_options.add_extension(".\process_monitor.crx") # Adding the extension to chrome # chrome_options.add_extension("C:\\Users\crawler\Desktop\Crawler\process_monitor.crx") chromium_path = ".\chrome-win32\chrome.exe" # Use the portable chromium browser # If chromium browser is not required then by removing the above chromium path, it will start using the default one # The default will be developer google chrome. # ONly Dev channel google chrome can support the extension used here. This extension used a particular API.
print str(datetime.now()) + "-" + action + "=>" + str(object); ######Set up###### ##Argument Setup parser = argparse.ArgumentParser(description="This script will support the importation of Swaggable's MySQL Database into Neo4j following the respective business rules.") parser.add_argument("-r", "--rebuild", help="scraps and rebuilds the Neo4J graph", action="store_true") parser.add_argument("-v", "--verbose", help="increase output verbosity", action="store_true") args = parser.parse_args() ##Neo4J Object neo4jConnectionString = "http://*****:*****@localhost:7474/db/data" neo4jGraph = Graph(neo4jConnectionString) ##Rebuild? if args.rebuild: neo4jGraph.delete_all() ##MySQL Object mysqlConnectionProperties = { 'user':'******', 'passwd':'q7pe9vk45g637DB', 'host':'ec2-184-169-142-95.us-west-1.compute.amazonaws.com', 'db':'swagbag_v2', 'autocommit':True, 'use_unicode':True, 'use_pure':False} mysqlConnection = mysql.connector.connect(**mysqlConnectionProperties) cursor = mysqlConnection.cursor() cursor.execute("SET GLOBAL max_allowed_packet=67108864")
class PopItToNeo(object): def __init__(self): config = yaml.load(open("config.yaml")) self.endpoint = "https://sinar-malaysia.popit.mysociety.org/api/v0.1" # you know so that you can override this. why? I am not sure self.membership_field = "memberships" self.person_field = "persons" self.organization_field = "organizations" self.post_field = "posts" self.graph = Graph(config["graph_db"]) if config["refresh"] == True: self.graph.delete_all() # Because I am still not familiar to query with cypher # So lets cache here. Hopefully the memory usage don't kill me self.organization_processed = {} self.person_processed = {} self.post_processed = {} def process_membership(self): # So lets start from membership membership_url = "%s/%s" % (self.endpoint, self.membership_field) while True: logging.warning("Processing %s" % membership_url) data = self.fetch_entity(membership_url) logging.warning("Processing membership") entries = data["result"] for entry in entries: # a membership have 3 important field, person_id, organization_id, posts_id if not (entry.get("person_id") and entry.get("organization_id")): continue person = self.fetch_person(entry["person_id"]) if not person: continue role = entry.get("role","member") if not role: role = "member" logging.warning("Role: %s" % role) params = [] # This happens only once anyway kwparams = {} kwparams["popit_id"] = entry["id"] start_date = get_timestamp(entry.get("start_date")) if start_date: kwparams["start_date"] = start_date end_date = get_timestamp(entry.get("end_date")) if end_date: kwparams["end_date"] = end_date post_exist = False if entry.get("post_id"): post = self.fetch_post(entry["post_id"]) if not post: continue if self.graph.match_one(person, role, post): post_exist = True logging.warning("Already exist, skipping") if not post_exist: relationship = Relationship(person, role, post, **kwparams) self.graph.create(relationship) organization_exist = False if entry.get("organization_id"): organization = self.fetch_organization(entry["organization_id"]) if not organization: continue if self.graph.match_one(person, role, organization): logging.warning("Already exist, skipping") organization_exist = True if not organization_exist: relationship = Relationship(person, role, organization, **kwparams) self.graph.create(relationship) if data.get("next_url"): membership_url = data.get("next_url") else: break def fetch_person(self, person_id): if person_id in self.person_processed: logging.warning("Person %s fetch from cache" % person_id) return self.person_processed[person_id] node = self.graph.find_one("Persons", "popit_id", person_id) if node: logging.warning("Already exist, skipping") self.person_processed[person_id] = node return node person_url = "%s/%s/%s" % (self.endpoint, self.person_field, person_id) data = self.fetch_entity(person_url) if not data: # Don't assume that this id won't be created the next time logging.warning("person not exist %s" % person_id) return None logging.warning("Fetching person") entity = data["result"] if type(entity["name"]) == list: name = entity["name"][0] else: name = entity["name"] logging.warning("Name: %s" % name) kwparam = {} birth_date = get_timestamp(entity.get("birth_date")) if birth_date: kwparam["birth_date"] = birth_date death_date = get_timestamp(entity.get("death_date")) if death_date: kwparam["death_date"] = death_date kwparam["name"] = name kwparam["popit_id"] = entity["id"] node = Node("Persons", **kwparam) self.graph.create(node) self.person_processed[entity["id"]] = node return node def fetch_organization(self, organization_id): if organization_id in self.organization_processed: logging.warning("Organization %s fetch from cache" % organization_id) return self.organization_processed[organization_id] node = self.graph.find_one("Organization", "popit_id", organization_id) if node: logging.warning("Already exist, skipping") self.organization_processed[organization_id] = node return node organization_url = "%s/%s/%s" % (self.endpoint, self.organization_field, organization_id) data = self.fetch_entity(organization_url) if not data: logging.warning("Organization don't exist %s" % organization_id) return None logging.warning("Fetch orgnanization") entity = data["result"] if type(entity["name"]) == list: name = entity["name"][0] else: name = entity["name"] kwparams = {} logging.warning("Name: %s" % name) kwparams["name"] = name kwparams["popit_id"] = entity["id"] founding_date = get_timestamp(entity.get("founding_date")) if founding_date: kwparams["founding_date"] = founding_date dissolution_date = get_timestamp(entity.get("dissolution_date")) if dissolution_date: kwparams["dissolution_date"] = dissolution_date if "classification" in entity: logging.warning("Classification:%s" % entity["classification"]) kwparams["classification"] = entity["classification"] node = Node("Organization", **kwparams) self.graph.create(node) self.organization_processed[entity["id"]] = node return node def fetch_post(self, post_id): if post_id in self.post_processed: logging.warning("Post %s fetch from cache" % post_id) return self.post_processed[post_id] node = self.graph.find_one("Posts", "popit_id", post_id) if node: logging.warning("Already exist, skipping") self.post_processed[post_id] = node return node post_url = "%s/% s/%s" % (self.endpoint, self.post_field, post_id) data = self.fetch_entity(post_url) if not data: logging.warning("Post don't exist %s" % post_id) return None logging.warning("Fetch post") entity = data["result"] # Fetch organization node, because post is link to organization # What is the implication of post without organization? try: if entity.get("organization_id"): organization = self.fetch_organization(entity["organization_id"]) else: organization = None except Exception as e: logging.warning(e.message) organization = None logging.warning("Label: %s" % entity["label"]) kwparams = {} kwparams["name"] = entity["label"] kwparams["popit_id"] = entity["id"] start_date = get_timestamp(entity.get("start_date")) if start_date: kwparams["start_date"] = start_date end_date = get_timestamp(entity.get("end_date")) if end_date: kwparams["end_date"] = end_date node = Node("Posts", **kwparams) self.graph.create(node) self.post_processed[entity["id"]] = node if organization: temp_param = {} if start_date: temp_param["start_date"] = start_date if end_date: temp_param["end_date"] = end_date relation = Relationship(node, "of", organization, **kwparams) self.graph.create(relation) return node def process_parent_company(self): organizations_url = "%s/%s" % (self.endpoint, self.organization_field) while True: data = self.fetch_entity(organizations_url) entries = data["result"] for entry in entries: if not entry.get("parent_id"): logging.warning("No parent id, moving on") continue else: logging.warning(entry.get("parent_id")) # TODO: Dafuq this is not DRY. parent_node = self.fetch_organization(entry["parent_id"]) if not parent_node: continue child_node = self.fetch_organization(entry["id"]) parent_relationship = Relationship(parent_node, "parent_of", child_node) if self.graph.match_one(parent_node, "parent_of", child_node): logging.warning("relation exist %s %s" % (entry["id"], entry["parent_id"])) continue self.graph.create(parent_relationship) if self.graph.match_one(child_node, "child_of", parent_node): logging.warning("relation exist %s %s" % (entry["id"], entry["parent_id"])) continue child_relationship = Relationship(child_node, "child_of", parent_node) self.graph.create(child_relationship) if "next_url" in data: organizations_url = data["next_url"] logging.warning(organizations_url) else: break def process_posts(self): post_url = "%s/%s" % (self.endpoint, self.post_field) while True: data = self.fetch_entity(post_url) entries = data["result"] for entry in entries: node = self.fetch_post(entry["id"]) self.graph.create(node) # Since creating organization relationship is already part of getting post # ourjob is done here if "next_url" in data: post_url = data["next_url"] logging.warning(post_url) else: break def fetch_entity(self, url): r = requests.get(url) time.sleep(0.1) if r.status_code != 200: # Just to make output consistent, excception did not kill the script anyway return {} return r.json()
class CreateDB(): def __init__(self): authenticate("localhost:7474", "neo4j", "1234") # authenticate("52.27.227.159:7474", "neo4j", "1234") self.graph = Graph(GRAPH_CONNECTION_STRNIG) self.link_provider = LinksProvider() self.tvmaze = TVMaze() self.shows = [] self.node_count = 0 self.relationship_count = 0 def create_shows(self): self.graph.delete_all() self.shows = self.tvmaze.get_all_shows() print " ++++ Creating {0} shows".format(len(self.shows)) show_count = 0 for show in self.shows: starttime = datetime.datetime.now() print "{0}:---------------------------------------------".format(show_count) show_count += 1 print " + Creating show: {0}".format(show["name"]) show_node = self.create_show(show) for genre in show.get('genres', []): genre_node = self.create_genre(genre) self.set_show_genre_relationship(show_node, genre_node) if show['webChannel'] is not None: webchannel_node = self.create_web_channel(show['webChannel']) self.create_web_channel_show_relationship(genre_node, webchannel_node) if show['network'] is not None: network_node = self.create_network(show['network']) self.create_network_show_relationship(network_node, show_node) episode_nodes = self.create_episodes(show) print " + creating {0} episodes".format(len(episode_nodes)) for episode_node in episode_nodes: self.create_episode_show_relationship(episode_node, show_node) endtime = datetime.datetime.now() deltatime = endtime-starttime print " - Operation took " + str(deltatime) print " - total Nodes: {0}, Relationships: {1}".format(self.node_count, self.relationship_count) print " - finished creating show" print " --- finished creating shows" return def create_show(self, show): show_node = graph.merge_one("Show", 'id', show['id']) show_node['url'] = show['url'] show_node['name'] = show['name'] show_node['type'] = show['type'] show_node['status'] = show['status'] show_node['runtime'] = show['runtime'] show_node['premiered'] = show['premiered'] show_node['weight'] = show['weight'] show_node['summary'] = show['summary'] show_node['img_medium'] = show['image'].get('medium', None) show_node['img_original'] = show['image'].get('original', None) if show['rating'] is not None: show_node['rating'] = show['rating']['average'] show_node.push() self.node_count += 1 return show_node """ Genre """ def create_genre(self, genre): genre_node = graph.merge_one("Genre", 'genre', genre) self.node_count += 1 return genre_node def set_show_genre_relationship(self, show_node, genre_node): show_of_genre = Relationship(show_node, "of genre", genre_node) graph.create_unique(show_of_genre) self.relationship_count += 1 return genre_node """ Network """ def create_network(self, network): network_node = graph.merge_one("Network", 'id', network['id']) network_node['name'] = network['name'] network_node.push() self.node_count += 1 if network['country'] is not None: country_node = self.create_country(network['country']) self.create_network_show_relationship(country_node, network_node) return network_node def create_network_show_relationship(self, network_node, show_node): show_of_network = Relationship(show_node, "from", network_node) graph.create_unique(show_of_network) self.relationship_count += 1 return show_of_network """ WebChannel """ def create_web_channel(self, webChannel): webchannel_node = graph.merge_one("WebChannel", 'id', webChannel['id']) webchannel_node['name'] = webChannel['name'] webchannel_node.push() self.node_count += 1 if webChannel['country'] is not None: country_node = self.create_country(webChannel['country']) self.create_country_web_channel_relationship(country_node, webchannel_node) return webchannel_node def create_web_channel_show_relationship(self, show_node, webchannel_node ): show_of_webchannel = Relationship(show_node, "from", webchannel_node) graph.create_unique(show_of_webchannel) self.relationship_count += 1 return show_of_webchannel """ Country """ def create_country(self, country): country_node = graph.merge_one("Country", 'code', country['code']) country_node['name'] = country['name'] country_node['timezone'] = country['timezone'] country_node.push() self.node_count += 1 return country_node def create_country_web_channel_relationship(self, country_node, webchannel_node): webchannel_from_country = Relationship(webchannel_node, "from", country_node) graph.create_unique(webchannel_from_country) self.relationship_count += 1 return webchannel_from_country def create_network_show_relationship(self, country_node, network_node ): network_from_country = Relationship(network_node, "from", country_node) graph.create_unique(network_from_country) self.relationship_count += 1 return network_from_country """ Episodes """ def create_episodes(self, show): episodes = self.tvmaze.get_show_episodes(show["id"]) episode_nodes = [] for episode in episodes: episode_node = self.create_episode(episode) episode_nodes.append(episode_node) episode_links = self.link_provider.get_links_for_episode(show['name'], episode['season'], episode['number']) for link in episode_links: link_node = self.create_link(link) self.create_link_episode_relationship(link_node, episode_node) return episode_nodes def create_episode(self, episode): episode_node = graph.merge_one("Episode", 'id', episode['id']) episode_node['name'] = episode['name'] episode_node['season'] = episode['season'] episode_node['number'] = episode['number'] episode_node['airdate'] = episode['airdate'] episode_node['airtime'] = episode['airtime'] episode_node['airstamp'] = episode['airstamp'] episode_node['runtime'] = episode['runtime'] episode_node['summary'] = episode['summary'] if episode['image'] is not None: episode_node['img_medium'] = episode['image'].get('medium', None) episode_node['img_original'] = episode['image'].get('original', None) episode_node.push() self.node_count += 1 return episode_node def create_episode_show_relationship(self, episode_node, show_node): show_has_episode = Relationship(show_node, "has", episode_node) graph.create_unique(show_has_episode) self.relationship_count += 1 return show_has_episode """ Link """ def create_link(self, link): """ :param link: { url: str, host: str } :return: """ link_node = graph.merge_one("Link", 'url', link['url']) link_node['host'] = link["host"] link_node.push() self.node_count += 1 return link_node def create_link_episode_relationship(self, link_node, episode_node): link_has_episode = Relationship(episode_node, "has", link_node) graph.create(link_has_episode) self.relationship_count += 1 return link_has_episode
import networkx as nx from py2neo import authenticate, Node, Relationship, Graph from py2neo.packages.httpstream.http import SocketError from requests.exceptions import ConnectionError, HTTPError from utils import get_results, handle_http_errors from functools import partial client = soundcloud.Client(client_id='454aeaee30d3533d6d8f448556b50f23') id2username_cache = {} # need to navigate and set the password to "pass" for first time authenticate("localhost:7474", "neo4j", "cloudchaser") userGraph = Graph() userGraph.delete_all() def getUserAttr(resource, attr): # if hasattr(resource, 'user'): return resource.user[attr] if hasattr(resource, attr): return getattr(resource, attr) return None getUsername = partial(getUserAttr, attr='username') getUserid = partial(getUserAttr, attr='id') @handle_http_errors def id2username(profile, kind='users'): global id2username_dict username = id2username_cache.get(profile, None) if username is not None: return username
{ 'CDs': None, 'Discos de vinil': None, 'Fitas cassete': None }, 'Importados': { 'CDs': None, 'Discos de vinil': None, 'Fitas cassete': None } }, 'Brinquedos': { 'Jogos de tabuleiro': None, 'Action Figures': None, 'Bonecas': None, 'Miniaturas': { 'Veículos': None, 'Construções': None } } } # Popula o banco de dados com nós e relações baseadas no dicionário 'categorias' authenticate("localhost:7474", "neo4j", "secret") stuffgraph = Graph() stuffgraph.delete_all() create_and_relate(categorias)