def saveInterpersonalScore(gdb, project, email): index_nodes = gdb.get_or_create_index(neo4j.Node, "index_nodes") email_list = [] a = index_nodes.get("author", email) a = a[0] if a: #get list of people to check for score, this way we don't have to run on every pairing which takes a long time q = "START a=node(" + str( a._id ) + ") MATCH (a)-[:communication_score]->()<-[:communication_score]-(b) where a<>b return distinct(b);" result = neo4j.CypherQuery(gdb, q) for r in result.stream(): email_list.append(r.b['email']) for e in email_list: b = index_nodes.get("author", e) b = b[0] if b: q = "START a=node(" + str(a._id) + "), b=node(" + str( b._id ) + ") MATCH (a)-[r:communication_score]->()<-[:communication_score]-(b) with sum(r.score) AS sum WHERE sum > 0 RETURN sum;" result = neo4j.CypherQuery(gdb, q) for r in result.stream(): rel, = gdb.create((a, "interpersonal_score", b, { "score": float(r[0]) }))
def saveCommunication(gdb, project, email): index_nodes = gdb.get_or_create_index(neo4j.Node, "index_nodes") a = index_nodes.get("author", email) a = a[0] if a: #http://docs.neo4j.org/refcard/2.0/ q = "START a=node(" + str( a._id) + ") MATCH (a)-[:communication]->(e) RETURN DISTINCT e;" result = neo4j.CypherQuery(gdb, q) try: for r in result.stream(): comm_score = 0 q = "START a=node(" + str(a._id) + "), e=node(" + str( r.e._id) + ") MATCH (a)-[c:communication]->(e) RETURN c;" result_nested = neo4j.CypherQuery(gdb, q) for c in result_nested.stream(): days = ( ((time.mktime(time.gmtime()) - float(c.c['epoch'])) / 60) / 60) / 24 comm_score = comm_score + math.pow( (1 - devknowledge.settings.EXPONENTIAL_DECAY), days) rel, = gdb.create((a, "communication_score", r.e, { "score": comm_score })) except ValueError: #TODO: find why this might be erroring in py2neo #raise ValueError("Cannot determine object type", data) print "Error: ValueError. continuing..."
def saveFileScore(gdb, project, email): index_nodes = gdb.get_or_create_index(neo4j.Node, "index_nodes") #weighting: 1, 1/2, 1/4, etc. #same file, dependent files, middle-man file, etc. email_list = [] a = index_nodes.get("author", email) a = a[0] if a: #figure out who we should query for paths, this prevents us from running on every single pairing which takes way too long q = "start a=node(" + str( a._id ) + ") match a-[:expertise]->()-[:depends*0..2]-()<-[:expertise]-b where a<>b return distinct(b);" result = neo4j.CypherQuery(gdb, q) for r in result.stream(): email_list.append(r.b['email']) for e in email_list: b = index_nodes.get("author", e) b = b[0] if b: total_file_score = 0 intermediate_score = 0 #same file q = "START a=node(" + str(a._id) + "), b=node(" + str( b._id ) + ") match a-[r:expertise]->()<-[:expertise]-b with sum(r.expertise) as sum WHERE sum > 0 return sum;" result = neo4j.CypherQuery(gdb, q) for r in result.stream(): intermediate_score += float(r[0]) total_file_score += intermediate_score #times 1 intermediate_score = 0 #dependent files q = "START a=node(" + str(a._id) + "), b=node(" + str( b._id ) + ") match a-[r:expertise]->()-[:depends]-()<-[:expertise]-b with sum(r.expertise) as sum WHERE sum > 0 return sum;" result = neo4j.CypherQuery(gdb, q) for r in result.stream(): intermediate_score += float(r[0]) total_file_score += intermediate_score * 0.5 intermediate_score = 0 #middle-man file q = "START a=node(" + str(a._id) + "), b=node(" + str( b._id ) + ") match a-[r:expertise]->()-[:depends]-()-[:depends]-()<-[:expertise]-b with sum(r.expertise) as sum WHERE sum > 0 return sum;" result = neo4j.CypherQuery(gdb, q) for r in result.stream(): intermediate_score += float(r[0]) total_file_score += intermediate_score * 0.25 #create new relationship between A and B with the summed and weighted score value rel, = gdb.create((a, "file_score", b, { "score": float(total_file_score) }))
def __reload(self, q): start_reload = time.time() self.session = 1 + q[0][0] self.frame_count = q[0][1] self.last_frame = q[0][2] self.last_timestamp = q[0][3] print ". ", query_str = "MATCH (actor:Actor) \ RETURN actor.actor, actor" query = neo4j.CypherQuery(self.gdb, query_str) for actor in query.stream(): self.actors_dict[actor[0]] = actor[1] print ". ", query_str = "MATCH (interaction:Interaction) \ RETURN interaction.actor1, \ interaction.actor2, \ interaction" query = neo4j.CypherQuery(self.gdb, query_str) for interaction in query.stream(): self.interactions_dict[(interaction[0], interaction[1])] = interaction[2] print ". ", return time.time() - start_reload
def auditalldrones(): audit = AUDITS() qtext = "START droneroot=node:CMAclass('Drone:*') MATCH drone-[:IS_A]->droneroot RETURN drone" query = neo4j.CypherQuery(CMAdb.cdb.db, qtext) droneobjs = CMAdb.store.load_cypher_nodes(query, Drone) droneobjs = [drone for drone in droneobjs] numdrones = len(droneobjs) for droneid in range(0, numdrones): droneid = int(droneobjs[droneid].designation[6:]) audit.auditadrone(droneid) query = neo4j.CypherQuery(CMAdb.cdb.db, '''START n=node:Drone('*:*') RETURN n''') queryobjs = CMAdb.store.load_cypher_nodes(query, Drone) queryobjs = [drone for drone in queryobjs] dronetbl = {} for drone in droneobjs: dronetbl[drone.designation] = drone querytbl = {} for drone in queryobjs: querytbl[drone.designation] = drone # Now compare them for drone in dronetbl: assert (querytbl[drone] is dronetbl[drone]) for drone in querytbl: assert (querytbl[drone] is dronetbl[drone])
def returnCommitInformation(project, commit): try: gdb = neo4j.GraphDatabaseService(settings.NEO4J_SERVER) except socket.error: print "Unable to connect to Neo4j: ", settings.NEO4J_SERVER return None q = "START a=node(*) where has(a.hash) return max(a.total_delta) as maximum;" result = neo4j.CypherQuery(gdb, q) #TODO: switch this so it's only a single return (so we don't have to go through the for loop) for r in result.stream(): maximum = r['maximum'] commit_info = None q = "START a=node(*) match a-[:impact]->() where a.hash = '"+commit+"' return distinct a;" result = neo4j.CypherQuery(gdb, q) for r in result.stream(): #TODO: switch this so it's only a single return (so we don't have to go through the for loop) converted_date = time.strftime('%Y/%m/%d', time.localtime(int(r.a['date']))) if r.a['total_delta'] == 0: impact_msg = "No technical change" elif r.a['total_delta'] < (maximum * 0.25): impact_msg = "Minimal technical change" elif r.a['total_delta'] < (maximum * 0.5): impact_msg = "Low technical change" elif r.a['total_delta'] < (maximum * 0.75): impact_msg = "Moderate technical change" else: impact_msg = "Substantial technical change." commit_info = CodeImpact(commit_hash=r.a['hash'], date=converted_date, delta_impact=round(r.a['total_delta'], 3), delta_impact_msg=impact_msg, logmsg=r.a['logmsg'], author_developer=r.a['author_developer'], author_email=r.a['author_email']) #if we don't have an :impact edge, we still need to populate the commit_info variable if not commit_info: q = "START a=node(*) where a.hash = '"+commit+"' return a;" result = neo4j.CypherQuery(gdb, q) for res in result.stream(): #TODO: switch this so it's only a single return (so we don't have to go through the for loop) converted_date = time.strftime('%Y/%m/%d', time.localtime(int(res.a['date']))) commit_info = CodeImpact(commit_hash=res.a['hash'], date=converted_date, delta_impact=0, delta_impact_msg="No technical change", logmsg=res.a['logmsg'], author_developer=res.a['author_developer'], author_email=res.a['author_email']) code_impact = [] q = "START a=node(*) match a-[r:impact]->b where a.hash = '"+commit+"' optional match b<-[:has]-c return r, b, c order by r.impactvalue desc limit 10;" result = neo4j.CypherQuery(gdb, q) for r in result.stream(): if r.c is not None: code_impact.append(CodeImpactFiles(code_impact=round(r.r['impactvalue'], 3), filename=r.b['filename'], function=r.b['function'], functionfilename=r.c['filename'])) else: code_impact.append(CodeImpactFiles(code_impact=round(r.r['impactvalue'], 3), filename=r.b['filename'], function=r.b['function'], functionfilename=None)) return commit_info, code_impact
def test_can_execute(graph): query = neo4j.CypherQuery(graph, "CREATE (a {name:'Alice'}) " "RETURN a.name") results = query.execute() assert len(results) == 1 assert len(results[0]) == 1 assert results[0][0] == "Alice"
def get_user_by_username(self, graph_db, username): query = neo4j.CypherQuery(graph_db, " MATCH (user:User {username: {username}}) " + " RETURN user ") params = {"username": username} result = query.execute_one(**params) return result
def calculateExpertise(filename, authors, line_start, line_end): try: gdb = neo4j.GraphDatabaseService(settings.NEO4J_SERVER) except socket.error: print "Unable to connect to Neo4j: ", settings.NEO4J_SERVER return None index_nodes = gdb.get_or_create_index(neo4j.Node, "index_nodes") expertise = {} for author in authors: expertise[author] = 0 devknowledge = {} knowledge_model = [] f = index_nodes.get('filename', filename) f = f[0] if f: num_developers = 0 #http://docs.neo4j.org/refcard/2.0/ # We need to return ALL developers here because we need the number of total developers for the calculation q = "START f=node(" + str( f._id ) + ") MATCH f<-[rel:knowledge]-a WHERE rel.line_number >= " + str( line_start) + " AND rel.line_number <= " + str( line_end) + " return rel, a;" result = neo4j.CypherQuery(gdb, q) for r in result.stream(): #print "Author: ", r.a['author'] a = index_nodes.get("author", r.a['email']) a = a[0] if a: days = (((time.mktime(time.gmtime()) - float(r.rel['epoch'])) / 60) / 60) / 24 #print "Days: " + str(days) expertise[r.a['author']] = expertise[r.a['author']] + math.pow( (1 - settings.EXPONENTIAL_DECAY), days) #print "Expertise: ", expertise[r.a['author']] num_developers += 1 #print "Number of developers: ", num_developers total = 0 for author in authors: total = total + expertise[author] / num_developers #print "total: ", total for author in authors: #save expertise knowledge as percentage devknowledge[author] = round( ((expertise[author] / num_developers) / total) * 100, 2) #only return top 10 for w in sorted(devknowledge, key=devknowledge.get, reverse=True)[:10]: if devknowledge[ w] > 1: #only return developers who have one percent or greater expertise knowledge_model.append( DevKnowledge(name=w, knowledge=devknowledge[w])) return knowledge_model
def returnDevelopers(project, letter, page): try: gdb = neo4j.GraphDatabaseService(settings.NEO4J_SERVER) except socket.error: print "Unable to connect to Neo4j: ", settings.NEO4J_SERVER return None developers = [] #TODO: only return based on project label (this should be a little faster) #http://docs.neo4j.org/refcard/2.0/ #The match on file_score only returns developers who have contributed code so #we don't overpopulate the communication suggestions section with people who only posted to the mailing list. if letter.isalpha() and letter != "unknown": q = "START n=node(*) match n-[:file_score]->() WHERE HAS(n.author) AND n.author =~ '^[" + letter.upper( ) + "|" + letter.lower( ) + "].*' RETURN DISTINCT n ORDER BY n.author SKIP " + str( (page - 1) * 50) + " LIMIT 50;" else: #unknown name selected q = "START n=node(*) match n-[:file_score]->() WHERE HAS(n.author) AND (n.author =~ '^[^A-Za-z].*' OR n.author = '') RETURN DISTINCT n ORDER BY n.author SKIP " + str( (page - 1) * 50) + " LIMIT 50;" result = neo4j.CypherQuery(gdb, q) for r in result.stream(): developers.append(Developer(name=r.n['author'], email=r.n['email'])) return developers
def update_user(self, graph_db, username, firstname, lastname): query = neo4j.CypherQuery(graph_db, "MATCH (user:User {username:{u}} ) " + "SET user.firstname = {fn}, user.lastname = {ln}") params = {"u": username, "fn": firstname, "ln": lastname} result = query.execute(**params) return result
def pruneDatabaseStaleFiles(all_files, project): try: gdb = neo4j.GraphDatabaseService(devknowledge.settings.NEO4J_SERVER) except: print "Unable to connect to Neo4j: ", devknowledge.settings.NEO4J_SERVER return None index_nodes = gdb.get_or_create_index(neo4j.Node, "index_nodes") #TODO: set processed boolean on all files, then delete later, this way we don't have to have a massive select statement q = "START f=node(*) WHERE HAS(f.filename) RETURN f;" #print q result = neo4j.CypherQuery(gdb, q) for record in result.stream(): file = record.f["filename"].replace(project, "", 1) #replace first occurence if file not in all_files: #print "Deleting file ", record.f["filename"], " in database." #delete all edges first as this is a requirement before we can delete the node record.f.isolate( ) #delete all relationships connected to this node, both incoming and outgoing #clear out index index_nodes.remove("filename", record.f["filename"], record.f) #delete the file node record.f.delete()
def save(self, subj, node=None): """ Save an object to a database node. :param subj: the object to save :param node: the database node to save to (if omitted, will re-save to same node as previous save) """ if node is not None: subj.__node__ = node # naively copy properties from object to node props = {} for key, value in subj.__dict__.items(): if not key.startswith("_"): props[key] = value if hasattr(subj, "__node__"): subj.__node__.set_properties(props) query = neo4j.CypherQuery(self.graph, "START a=node({A}) " "MATCH (a)-[r]->(b) " "DELETE r") query.execute(A=subj.__node__._id) else: subj.__node__, = self.graph.create(props) # write rels if hasattr(subj, "__rel__"): batch = neo4j.WriteBatch(self.graph) for rel_type, rels in subj.__rel__.items(): for rel_props, endpoint in rels: end_node = self._get_node(endpoint) if not neo4j.familiar(end_node, self.graph): raise ValueError(end_node) batch.create((subj.__node__, rel_type, end_node, rel_props)) batch.run() return subj
def add_photo_user_rels(): """ Adds relationships between photos and the users who took them. """ query_string = """ MATCH (u:FlickrUser), (p:FlickrPhoto) WHERE p.user = u.user_id MERGE (u)-[a:Authored]->(p) """ q = neo4j.CypherQuery(DB, query_string) results = q.execute()
def add_places_relationships(): # Iterates over all the tweets and other text data and # uses Aho Corasick to check for matches against all 1100 or so places at once. # If a match is found it adds the appropriate relationship to the graph. all_matches = [] place_nodes = get_node_attr_by_label('OSM', 'name') place_ids = {p[1].lower(): p[0] for p in place_nodes} name_index = get_name_index([p[1] for p in place_nodes]) tweet_texts = get_node_attr_by_label('Tweets', 'content') for _, tweet in enumerate(tweet_texts): tweet_id, text = tweet text = text.encode( 'ascii', 'ignore' ) # TODO Work out how to use esmre to match unicode code points > 128 matches = name_index.query(text.lower()) for m in matches: if m[1] in place_ids: all_matches.append((tweet_id, place_ids[m[1]])) for i, match in enumerate(all_matches): tid, pid = match qs = """ MATCH (n) WHERE id(n)={tid} MATCH (p) WHERE id(p)={pid} MERGE (n)-[m:MENTIONED]->(p) """ q = neo4j.CypherQuery(DB, qs) q.execute(tid=tid, pid=pid)
def cypherQuery(self, qs): query = neo4j.CypherQuery(self.graph, qs) qd = query.execute().data listQuery = list() for x in qd: xl = list() for v in x.values: logger.debug(u"Type : %s" % type(v)) if isinstance(v, Node): name = v[u"aname"] xl.append(name) elif isinstance(v, int) or isinstance(v, float): count = v xl.append(count) elif isinstance(v, str) or isinstance(v, unicode): st = v xl.append(st) listQuery.append(xl) return listQuery, qd
def create_user_view_and_return_views(self, graph_db, username, productNodeId): # create timestamp and string display ts = time.time() timestampAsStr = datetime.fromtimestamp( int(ts)).strftime('%m/%d/%Y') + " at " + datetime.fromtimestamp( int(ts)).strftime('%I:%M %p') query = neo4j.CypherQuery( graph_db, " MATCH (p:Product), (u:User { username:{u} })" + " WHERE id(p) = {productNodeId}" + " WITH u,p" + " MERGE (u)-[r:VIEWED]->(p)" + " SET r.dateAsStr={timestampAsStr}, r.timestamp={ts}" + " WITH u " + " MATCH (u)-[r:VIEWED]->(p)" + " RETURN p.title as title, r.dateAsStr as dateAsStr" + " ORDER BY r.timestamp desc") params = { "productNodeId": productNodeId, "u": username, "timestampAsStr": timestampAsStr, "ts": ts } result = query.execute(**params) result = self.get_product_trail_results_as_json(result) return result
def predice(self, nodo, rel, fast, top_n, filtrado): if not fast: votos = [] for r in self.r_types[rel]: other = r["s"] if (r["s"] == nodo): other = r["t"] p2 = neo4j.CypherQuery( self.graph_db, "match (n)-[:" + rel[0] + "]-(m) where n." + label + ' = "' + other + '" return m.' + label).execute() print p2 if len(p2) > 0: for p in p2: prop2 = p["m." + label] prop2 = prop2.replace(" ", "_") other = other.replace(" ", "_") if other in self.w2v and prop2 in self.w2v: prop1 = self.similares([nodo, other], [prop2])[0][0] votos.append(prop1) return max(set(votos), key=votos.count) if fast: sim = self.similares(nodo, [self.w2v[nodo] + self.m_vectors[str(rel)]], [], top_n, filtrado) f = [] for s in sim: f.append(s[0]) if len(f) > 0: return f else: return ""
def get_rels(self, traversals): if not os.path.exists("models/" + self.bd + "-trels.p"): f = open("models/" + self.bd + "-trels.p", "w") consulta = neo4j.CypherQuery( self.graph_db, "match (n)-[r]->(m) return n." + self.label + " as s,m." + self.label + " as t ,r,type(r) as tipo,labels(m) as tipot").execute() todas = [] for c in consulta: todas.append([c.s, c.tipo, c.t, c.tipot]) pickle.dump(todas, f) else: f = open("models/" + self.bd + "-trels.p", "r") todas = pickle.load(f) links = dict() for l in todas: link = dict() if l[0] and l[1] and l[2]: link["tipo"] = l[1] link["s"] = l[0].replace(" ", "_") link["t"] = l[2].replace(" ", "_") link["tipot"] = l[3][0].replace(" ", "_") if link["s"] in self.w2v and link["t"] in self.w2v: link["v"] = self.w2v[link["t"]] - self.w2v[link["s"]] if not link["tipo"] in links: links[link["tipo"]] = [] links[link["tipo"]].append(link) self.r_types = links
def get_all_artists(): cypher = """ START root=node:Artists("*:*") RETURN root """ query = neo4j.CypherQuery(graph_db, cypher) return [i[0].get_properties() for i in query.execute()]
def foo(): artist = request.args.get('artist') try: result = timeout_cache[artist] print("cached") return json.dumps(result) except KeyError: print("no cache") cypher = """ START artist=node:node_auto_index(name={}) MATCH (artist)<-[r:RELATED]-(artist2)-[r1:RELATED]->(artist3)-[r2:RELATED]->(artist) RETURN artist, r.score, artist2, r1.score, artist3, r2.score limit 100 """.format(artist) query = neo4j.CypherQuery(graph_db, cypher) result = query.execute() result = list(result) origin = result[0][0].get_properties() related = [(i[1], i[2].get_properties(), i[3], i[4].get_properties()) for i in result] res = [origin, related] timeout_cache[artist] = res response = Response(json.dumps(res), mimetype='application/json') response.headers['Access-Control-Allow-Origin'] = "*" return response
def _cypher(self, format_, query, params=None): if query == "-": query = self._in.read() record_set = neo4j.CypherQuery(self._graph, query).execute(**params or {}) writer = ResultWriter(self._out) writer.write(format_, record_set)
def get_products_has_a_tag_and_user_uses_a_matching_tag(self, graph_db): query = neo4j.CypherQuery( graph_db, " MATCH (p:Product)-[:HAS]->(t)<-[:USES]-(u:User) " + " RETURN p.title as title , collect(u.username) as users, " + " collect(distinct t.wordPhrase) as tags ") result = query.execute() return result
def gen_bp_rules_by_ruleset(store, rulesetname): '''Return generator providing all BP rules for the given ruleset ''' from py2neo import neo4j query = neo4j.CypherQuery(store.db, CMAconsts.QUERY_RULESET_RULES) return store.load_cypher_nodes(query, BPRules , params={'rulesetname': rulesetname})
def queryOneNode(label, key, value): query_str = "match (n:%s) where n.%s='%s' return (n)" % (label, key, value) graph_db = neo4j.GraphDatabaseService() query = neo4j.CypherQuery(graph_db, query_str) a, = query.execute() return a[0]
def members_ring_order(self): 'Return all the Drones that are members of this ring - in ring order' ## FIXME - There's a cypher query that will return these all in one go # START Drone=node:Drone(Drone="drone000001") # MATCH Drone-[:RingNext_The_One_Ring*]->NextDrone # RETURN NextDrone.designation, NextDrone if self._insertpoint1 is None: #print >> sys.stderr, 'NO INSERTPOINT1' return if Store.is_abstract(self._insertpoint1): #print >> sys.stderr, ('YIELDING INSERTPOINT1:', self._insertpoint1 #, type(self._insertpoint1)) yield self._insertpoint1 return startid = Store.id(self._insertpoint1) # We can't pre-compile this, but we hopefully we won't use it much... q = '''START Drone=node(%s) MATCH p=Drone-[:%s*0..]->NextDrone WHERE length(p) = 0 or Drone <> NextDrone RETURN NextDrone''' % (startid, self.ournexttype) query = neo4j.CypherQuery(CMAdb.cdb.db, q) for elem in CMAdb.store.load_cypher_nodes(query, Drone): yield elem return
def add_place_to_db(place, points_idx): place_type = place['tags']['place'] if 'place' in place['tags'] else place[ 'tags']['shop'] if 'shop' in place['tags'] else 'undefined' if place_type == 'undefined': print "undefined --> %s" % jsom.dumps(place['tags']) return query_string = """ MERGE ( place:Place:OSM:""" + place_type.replace(' ', '_').capitalize() + """ { lat: {p}.lat, lon: {p}.lon, name: {p}.name, osmid: {p}.osmid, raw_tags: {p}.raw_tags }) RETURN place """ place_props = { k if k != 'tags' else 'raw_tags': v if k != 'tags' and v is not None and v != '' else json.dumps(v) for k, v in place.iteritems() } q = neo4j.CypherQuery(DB, query_string) results = q.execute(p=place_props) place_node = results.data[0].values[0] points_idx.add('k', 'v', place_node)
def get_user_location(self, graph_db, username): query = neo4j.CypherQuery(graph_db, " MATCH (u:User { username : {u} } )-[:HAS]-(l:Location) " + " RETURN u.username as username, l.address as address, l.city as city, l.state as state, " + " l.zip as zip, l.lat as lat, l.lon as lon") params = {"u": username} result = query.execute(**params) return result
def deleteAnalysis(self, uuid): if uuid is None: return query_text = "match (a:analysis {uuid: { uuid }})<-[r:has_analysis]-(x:raw_data) optional match (a)-[r1:generates_annotation]->(an:annotation) delete r, r1, an, a;" query = neo4j.CypherQuery(self.gdb, query_text) res = query.execute(uuid=uuid) return
def returnUniqueAuthors(project, filename): try: gdb = neo4j.GraphDatabaseService(settings.NEO4J_SERVER) except socket.error: print "Unable to connect to Neo4j: ", settings.NEO4J_SERVER return None index_nodes = gdb.get_or_create_index(neo4j.Node, "index_nodes") try: f = index_nodes.get('filename', project + "/" + filename) f = f[0] except: #we don't have data on this return None authors = [] if f: #http://docs.neo4j.org/refcard/2.0/ q = "START f=node(" + str( f._id ) + ") MATCH f<-[r:knowledge]-(a) RETURN DISTINCT a ORDER BY a.author;" result = neo4j.CypherQuery(gdb, q) for r in result.stream(): authors.append(r.a['author']) return authors