def test_side_effects(self): statics.load_statics(globals()) connection = DriverRemoteConnection('ws://localhost:45940/gremlin', 'g') # g = Graph().traversal().withRemote(connection) ### t = g.V().hasLabel("project").name.iterate() assert 0 == len(t.side_effects.keys()) try: m = t.side_effects["m"] raise Exception( "Accessing a non-existent key should throw an error") except KeyError: pass ### t = g.V().out("created").groupCount("m").by("name") results = t.toSet() assert 2 == len(results) assert Vertex(3) in results assert Vertex(5) in results assert 1 == len(t.side_effects.keys()) assert "m" in t.side_effects.keys() m = t.side_effects["m"] assert isinstance(m, dict) assert 2 == len(m) assert 3 == m["lop"] assert 1 == m["ripple"] assert isinstance(m["lop"], long) assert isinstance(m["ripple"], long) ### t = g.V().out("created").groupCount("m").by("name").name.aggregate("n") results = t.toSet() assert 2 == len(results) assert "lop" in results assert "ripple" in results assert 2 == len(t.side_effects.keys()) assert "m" in t.side_effects.keys() assert "n" in t.side_effects.keys() n = t.side_effects.get("n") assert isinstance(n, dict) assert 2 == len(n) assert "lop" in n.keys() assert "ripple" in n.keys() assert 3 == n["lop"] assert 1 == n["ripple"] t = g.withSideEffect('m', 32).V().map(lambda: "x: x.sideEffects('m')") results = t.toSet() assert 1 == len(results) assert 32 == list(results)[0] assert 32 == t.side_effects['m'] assert 1 == len(t.side_effects.keys()) try: x = t.side_effects["x"] raise Exception( "Accessing a non-existent key should throw an error") except KeyError: pass connection.close()
def test_traversals(self): statics.load_statics(globals()) connection = DriverRemoteConnection('ws://localhost:8182/gremlin', 'g') assert "remoteconnection[ws://localhost:8182/gremlin,g]" == str(connection) g = Graph().traversal().withRemote(connection) assert long(6) == g.V().count().toList()[0] # assert Vertex(1) == g.V(1).next() assert 1 == g.V(1).id().next() assert Traverser(Vertex(1)) == g.V(1).nextTraverser() assert 1 == len(g.V(1).toList()) assert isinstance(g.V(1).toList(), list) results = g.V().repeat(out()).times(2).name results = results.toList() assert 2 == len(results) assert "lop" in results assert "ripple" in results # assert 10 == g.V().repeat(both()).times(5)[0:10].count().next() assert 1 == g.V().repeat(both()).times(5)[0].count().next() assert 0 == g.V().repeat(both()).times(5)[0:0].count().next() assert 4 == g.V()[2:].count().next() assert 2 == g.V()[:2].count().next() # todo: need a traversal metrics deserializer g.V().out().profile().next() connection.close()
def test_side_effect_close(self): connection = DriverRemoteConnection('ws://localhost:8182/gremlin', 'g') g = Graph().traversal().withRemote(connection) t = g.V().aggregate('a').aggregate('b') t.toList() # The 'a' key should return some side effects results = t.side_effects.get('a') assert results # Close result is None results = t.side_effects.close() assert not results # Shouldn't get any new info from server # 'b' isn't in local cache results = t.side_effects.get('b') assert not results # But 'a' should still be cached locally results = t.side_effects.get('a') assert results # 'a' should have been added to local keys cache, but not 'b' results = t.side_effects.keys() assert len(results) == 1 a, = results assert a == 'a' # Try to get 'b' directly from server, should throw error with pytest.raises(Exception): t.side_effects.value_lambda('b') connection.close()
def test_traversals(self): statics.load_statics(globals()) connection = DriverRemoteConnection('ws://localhost:8182/gremlin', 'g') assert "remoteconnection[ws://localhost:8182/gremlin,g]" == str( connection) # g = Graph().traversal().withRemote(connection) # assert 6L == g.V().count().toList()[0] # assert Vertex(1) == g.V(1).next() assert 1 == g.V(1).id().next() assert Traverser(Vertex(1)) == g.V(1).nextTraverser() assert 1 == len(g.V(1).toList()) assert isinstance(g.V(1).toList(), list) # results = g.V().repeat(out()).times(2).name.toList() assert 2 == len(results) assert "lop" in results assert "ripple" in results # assert 10 == g.V().repeat(both()).times(5)[0:10].count().next() assert 1 == g.V().repeat(both()).times(5)[0].count().next() assert 0 == g.V().repeat(both()).times(5)[0:0].count().next() assert 4 == g.V()[2:].count().next() assert 2 == g.V()[:2].count().next() # todo: need a traversal metrics deserializer g.V().out().profile().next() connection.close()
def test_strategies(self): statics.load_statics(globals()) connection = DriverRemoteConnection('ws://localhost:8182/gremlin', 'g') # g = Graph().traversal().withRemote(connection). \ withStrategies(TraversalStrategy("SubgraphStrategy", {"vertices": __.hasLabel("person"), "edges": __.hasLabel("created")})) assert 4 == g.V().count().next() assert 0 == g.E().count().next() assert 1 == g.V().label().dedup().count().next() assert "person" == g.V().label().dedup().next() # g = Graph().traversal().withRemote(connection). \ withStrategies(SubgraphStrategy(vertices=__.hasLabel("person"), edges=__.hasLabel("created"))) assert 4 == g.V().count().next() assert 0 == g.E().count().next() assert 1 == g.V().label().dedup().count().next() assert "person" == g.V().label().dedup().next() # g = g.withoutStrategies(SubgraphStrategy). \ withComputer(workers=4, vertices=__.has("name", "marko"), edges=__.limit(0)) assert 1 == g.V().count().next() assert 0 == g.E().count().next() assert "person" == g.V().label().next() assert "marko" == g.V().name.next() # g = Graph().traversal().withRemote(connection).withComputer() assert 6 == g.V().count().next() assert 6 == g.E().count().next() connection.close()
def create_vertices(term: str, entities: list): """ Creates vertices to graph term to entities """ graph = Graph() connection = DriverRemoteConnection( f'wss://{os.environ["NEPTUNE_ENDPOINT"]}:8182/gremlin', 'g') g = graph.traversal().withRemote(connection) # Check if a vertex has been created for the term term_vertex = g.V().has("term", "value", term) term_vertex = term_vertex.next() if term_vertex.hasNext() else g.addV( "term").property("value", term).next() # Create an entity vertex for each and link to term for e in entities: entity_vertex = g.V().has("entity", "value", e) entity_vertex = entity_vertex.next() if entity_vertex.hasNext() else \ g.addV("entity") \ .property("value", e["entity"]) \ .property("score", e["score"]) \ .property("type", e["type"]).next() g.V(term_vertex).addE("has_entity").to(entity_vertex).iterate() connection.close()
def go(): conn = DriverRemoteConnection( 'ws://localhost:45940/gremlin', 'gmodern', pool_size=4) g = Graph().traversal().withRemote(conn) yield gen.sleep(0) assert len(g.V().toList()) == 6 conn.close()
def test_side_effect_close(self): connection = DriverRemoteConnection('ws://localhost:8182/gremlin', 'g') g = Graph().traversal().withRemote(connection) t = g.V().aggregate('a').aggregate('b') t.toList() # The 'a' key should return some side effects results = t.side_effects.get('a') assert results # Close result is None results = t.side_effects.close() assert not results # Shouldn't get any new info from server # 'b' isn't in local cache results = t.side_effects.get('b') assert not results # But 'a' should still be cached locally results = t.side_effects.get('a') assert results # 'a' should have been added to local keys cache, but not 'b' results = t.side_effects.keys() assert len(results) == 1 a, = results assert a == 'a' # Try to get 'b' directly from server, should throw error with pytest.raises(Exception): t.side_effects.value_lambda('b') connection.close()
def setup_graph(conn_string=DEFAULT_LOCAL_CONNECTION_STRING): """ Establish the connection to a property graph service using the connection string and return the gremlin graph. :param conn_string: connection parameter :return: gremlin graph """ try: graph = Graph() logging.debug('Trying To Connect') # new style connection = DriverRemoteConnection(conn_string, 'g') connection.close() connection = DriverRemoteConnection(conn_string, 'g') logging.debug('Connected') # The connection should be closed on shut down to close open connections with connection.close() # g = graph.traversal().withRemote(connection) # Deprecated instantiation of traversal g = traversal().withRemote(connection) logging.info('Successfully connected to the graph server') except Exception as e: # Shouldn't really be so broad logging.error("Could not connect to the Gremlin server. Run for example:" \ "\n'docker run --rm --name janusgraph-default janusgraph/janusgraph:latest' OR" \ "\n'docker run --name gremlin-server -p 8182:8182 tinkerpop/gremlin-server'") raise ConnectionError("Could not connect to the Gremlin server.") return g
def go(): conn = DriverRemoteConnection( 'ws://localhost:45940/gremlin', 'gmodern', pool_size=4) g = traversal().withRemote(conn) yield gen.sleep(0) assert len(g.V().toList()) == 6 conn.close()
def lambda_handler(event, context): graph = Graph() uid1 = event["userId1"] uid2 = event["userId2"] remoteConn = DriverRemoteConnection('ws://neptunedbinstance-3f8bwqre3vsy.cft44vxyghsh.us-east-1.neptune.amazonaws.com:8182/gremlin','g') g = graph.traversal().withRemote(remoteConn) friends= g.V().hasLabel('User').has('uid', uid1).\ both('FRIEND').aggregate('friends'). \ valueMap().toList() list2 = [] for item in friends: uid = item["uid"] list2.append(uid[0]) if uid2 in list2: return { 'statusCode': 400 } a=g.V().hasLabel('User').has('uid', uid1).next() b=g.V().hasLabel('User').has('uid', uid2).next() g.V(a).addE('FRIEND').to(b).iterate() remoteConn.close() # TODO implement return { 'statusCode': 200 }
def test_strategies(self): statics.load_statics(globals()) connection = DriverRemoteConnection('ws://localhost:8182/gremlin', 'g') # g = Graph().traversal().withRemote(connection). \ withStrategies(TraversalStrategy("SubgraphStrategy", {"vertices": __.hasLabel("person"), "edges": __.hasLabel("created")})) assert 4 == g.V().count().next() assert 0 == g.E().count().next() assert 1 == g.V().label().dedup().count().next() assert "person" == g.V().label().dedup().next() # g = Graph().traversal().withRemote(connection). \ withStrategies(SubgraphStrategy(vertices=__.hasLabel("person"), edges=__.hasLabel("created"))) assert 4 == g.V().count().next() assert 0 == g.E().count().next() assert 1 == g.V().label().dedup().count().next() assert "person" == g.V().label().dedup().next() # g = g.withoutStrategies(SubgraphStrategy). \ withComputer(workers=4, vertices=__.has("name", "marko"), edges=__.limit(0)) assert 1 == g.V().count().next() assert 0 == g.E().count().next() assert "person" == g.V().label().next() assert "marko" == g.V().name.next() # g = Graph().traversal().withRemote(connection).withComputer() assert 6 == g.V().count().next() assert 6 == g.E().count().next() connection.close()
def main(): graph = Graph() remote = DriverRemoteConnection('ws://' + os.environ['DB_ENDPOINT'] + ':8182/gremlin', 'g') g = graph.traversal().withRemote(remote) seed_users(g) remote.close()
def handle_request(): try: remote_connection = DriverRemoteConnection("ws://localhost:45940/gremlin", "g") g = traversal().withRemote(remote_connection) g.V().limit(1).toList() remote_connection.close() return True except RuntimeError: return False
def test_side_effects(self): statics.load_statics(globals()) connection = DriverRemoteConnection('ws://localhost:8182/gremlin', 'g') # g = Graph().traversal().withRemote(connection) ### t = g.V().hasLabel("project").name.iterate() assert 0 == len(t.side_effects.keys()) try: m = t.side_effects["m"] raise Exception("Accessing a non-existent key should throw an error") except KeyError: pass ### t = g.V().out("created").groupCount("m").by("name") results = t.toSet() assert 2 == len(results) assert Vertex(3) in results assert Vertex(5) in results assert 1 == len(t.side_effects.keys()) assert "m" in t.side_effects.keys() m = t.side_effects["m"] assert isinstance(m, dict) assert 2 == len(m) assert 3 == m["lop"] assert 1 == m["ripple"] assert isinstance(m["lop"], long) assert isinstance(m["ripple"], long) ### t = g.V().out("created").groupCount("m").by("name").name.aggregate("n") results = t.toSet() assert 2 == len(results) assert "lop" in results assert "ripple" in results assert 2 == len(t.side_effects.keys()) assert "m" in t.side_effects.keys() assert "n" in t.side_effects.keys() n = t.side_effects.get("n") assert isinstance(n, dict) assert 2 == len(n) assert "lop" in n.keys() assert "ripple" in n.keys() assert 3 == n["lop"] assert 1 == n["ripple"] t = g.withSideEffect('m', 32).V().map(lambda: "x: x.sideEffects('m')") results = t.toSet() assert 1 == len(results) assert 32 == list(results)[0] assert 32 == t.side_effects['m'] assert 1 == len(t.side_effects.keys()) try: x = t.side_effects["x"] raise Exception("Accessing a non-existent key should throw an error") except KeyError: pass connection.close()
def actors(request): graph = Graph() remoteConn = DriverRemoteConnection('ws://<neptune endpoint>:8182/gremlin', 'g') g = graph.traversal().withRemote(remoteConn) myList = g.V().has( 'title', request.POST['movie_name']).in_().limit(40).values().toList() remoteConn.close() context = {'movie': request.POST['movie_name'], 'actors': myList} return render(request, 'polls/movie-results.html', context)
def lambda_handler(event, context): print(event) graph = Graph() table = dynamodb_client.Table('user') tmp = table.scan() dict1 = {} for item in tmp['Items']: dict1[item['UID']] = [] pair = (item['firstName'], item['lastName'], item['pic_url']) dict1[item['UID']].append(pair) print(dict1) remoteConn = DriverRemoteConnection( 'ws://neptunedbinstance-3f8bwqre3vsy.cft44vxyghsh.us-east-1.neptune.amazonaws.com:8182/gremlin', 'g') g = graph.traversal().withRemote(remoteConn) # a=g.V().hasLabel('User').has('uid', '1834389').next() # b=g.V().hasLabel('User').has('uid', '594112').next() # g.V(a).addE('Friend').to(b).iterate() key = event["userId"] friends= g.V().hasLabel('User').has('uid', key).\ both('FRIEND').aggregate('friends'). \ valueMap().toList() list2 = [] for item in friends: tmplist = [] uid = item["uid"] tmplist.append(uid[0]) for tmp in dict1[uid[0]][0]: tmplist.append(tmp) list2.append(tmplist) return {'statusCode': 200, 'body': list2} count = 0 recommend_list = { k: v for k, v in sorted(recommend.items(), key=lambda item: -item[1]) } list1 = [] for item in recommend_list: if item != key: data = dynamodb.get_item(TableName='user', Key={'UID': { 'S': str(item) }}) pair = (str(item), data['Item']['firstName']['S'], data['Item']['lastName']['S'], data['Item']['pic_url']['S']) list1.append(pair) count += 1 if (count == 3): break print(list1) remoteConn.close() # TODO implement return {'statusCode': 200, 'body': list1}
def movies(request): graph = Graph() remoteConn = DriverRemoteConnection('ws://<path to neptune>:8182/gremlin', 'g') g = graph.traversal().withRemote(remoteConn) #print(g.V().limit(2).toList()) myList = g.V().has( 'name', request.POST['actor_name']).out().limit(40).values().toList() remoteConn.close() context = {'actor': request.POST['actor_name'], 'movies': myList} return render(request, 'polls/movie-results.html', context)
def main(): graph = Graph() remote = DriverRemoteConnection( 'ws://' + os.environ['DB_ENDPOINT'] + ':8182/gremlin', 'g') g = graph.traversal().withRemote(remote) print('Flushing existing vertices in local db...') flush(g) print('Done.') remote.close()
def _executor(self, q, loop): try: connection = DriverRemoteConnection('ws://localhost:45940/gremlin', 'g', loop=loop) g = Graph().traversal().withRemote(connection) assert len(g.V().toList()) == 6 except: q.put(sys.exc_info()[0]) else: q.put('success!') connection.close()
def separation(request): statics.load_statics(globals()) inputs = [x.strip() for x in request.POST['actor_names'].split(',')] graph = Graph() remoteConn = DriverRemoteConnection( 'ws://<neptune endpoint>.com:8182/gremlin', 'g') g = graph.traversal().withRemote(remoteConn) myList = g.V().has( 'name', inputs[0]).repeat(out().in_().simplePath()).until( has('name', inputs[1])).path().by('name').by('title').limit(40).toList() remoteConn.close() context = {'actors': request.POST['actor_names'], 'separation': myList} return render(request, 'polls/movie-results.html', context)
def kg_testing(inst=1, M=10, N=5, testing=False): # number of data points and properties m = M p = N if p > const.MAX_FEATURES: p = const.MAX_FEATURES # define the number of splits of each property s = p if p <= const.MAX_SPLITS else const.MAX_SPLITS # uniformly sample values between 0 and 1 as the data set dat = np.random.sample(size=(m, p)) # create column names (normally obtained by var.dtype.names) # # use an explicit dict to make sure that the order is preserved coln = [("col" + str(i), (i - 1)) for i in range(1, p + 1)] # create the data for the sample knowledge graph (only one brain) kgdat = create_kg(inst, dat, s, [[int(i) for i in np.asarray(coln)[:, 1]]]) # populate the knowledge graph into the remote DB # # instantiate a JanusGraph object graph = Graph() # connection to the remote server conn = DriverRemoteConnection(url_kg(inst), 'g') # get the remote graph traversal g = graph.traversal().withRemote(conn) # we only want to process the right brain print(kg(const.V, inst, coln, kgdat, g, False, testing)) # after building the knowledge graph, use the output of ocr to test the GloVe write # # call cognitive to produce the ocr output oret = ocr_testing() # get the location of the glove file src = cfg["instances"][inst]["src"]["index"] typ = cfg["instances"][inst]["src"]["types"]["glove"] gfl = cfg["instances"][inst]["sources"][src][typ]["connection"]["file"] # call extendglove to produce the GloVe output and transform it to an array # with the first term in each row being the key and all other terms are values rdat = extendglove(oret[0][0], gfl[0]) rdat = [(k, v) for k, v in list(rdat.items())[0:M]] # write the glove output to the knowledge graph print(kg(const.ENTS, inst, coln, rdat, g, False, testing)) # get the ocr data ... using the real way to get the ocr data here typ = cfg["instances"][inst]["src"]["types"]["ocrf"] pdfs = cfg["instances"][inst]["sources"][src][typ]["connection"]["files"] cdat = cognitive(const.OCR, pdfs, inst, False, testing) # write the ocr data to the graph print(kg(const.CONS, inst, coln, cdat[1:], g, True, testing)) # close the connection conn.close() # test the thought function with the default number of predictions 3 print(thought(inst, coln))
def main(): connection = DriverRemoteConnection('ws://localhost:8182/gremlin', '{0}_traversal'.format('bandi')) # g = traversal().withRemote(connection) c = client.Client('ws://localhost:8182/gremlin', '{0}_traversal'.format('bandi')) results = c.submit( 'g.V().group().by(label).by(properties().group().by(key).by(value().map{it.get().getClass()}))' ).all().result() nodes_info = NodeInfo.from_result(results[0]) encoded = yaml.safe_dump({'nodes': map(lambda e: e.to_dict(), nodes_info)}) print(encoded) print(NodeInfo.from_dict(yaml.safe_load(encoded))) connection.close()
def joint_movies(request): statics.load_statics(globals()) inputs = [x.strip() for x in request.POST['actor_names'].split(',')] graph = Graph() remoteConn = DriverRemoteConnection('ws://<neptune endpoint>:8182/gremlin', 'g') g = graph.traversal().withRemote(remoteConn) if (len(inputs) == 2): myList = g.V().has('name', inputs[0]).repeat(out().where(__.in_().has( 'name', inputs[1]))).emit().values().toList() else: myList = g.V().has('name', inputs[0]).repeat(out().where(__.in_().has( 'name', inputs[1])).where(__.in_().has( 'name', inputs[2]))).emit().values().toList() remoteConn.close() context = {'actor': request.POST['actor_names'], 'movies': myList} return render(request, 'polls/movie-results.html', context)
def lambda_handler(event, context): graph = Graph() remoteConn = DriverRemoteConnection( 'ws://neptunedbinstance-3f8bwqre3vsy.cft44vxyghsh.us-east-1.neptune.amazonaws.com:8182/gremlin', 'g') g = graph.traversal().withRemote(remoteConn) # a=g.V().hasLabel('User').has('uid', '1834389').next() # b=g.V().hasLabel('User').has('uid', '594112').next() # g.V(a).addE('Friend').to(b).iterate() key = event["userId"] recommend = g.V().hasLabel('User').has('uid', key).\ both('FRIEND').aggregate('friends'). \ both('FRIEND'). \ where(P.without('friends')). \ groupCount().by('uid'). \ next() friends= g.V().hasLabel('User').has('uid', key).\ both('FRIEND').aggregate('friends'). \ valueMap().toList() print(friends) count = 0 recommend_list = { k: v for k, v in sorted(recommend.items(), key=lambda item: -item[1]) } list1 = [] for item in recommend_list: if item != key: data = dynamodb.get_item(TableName='user', Key={'UID': { 'S': str(item) }}) pair = (str(item), data['Item']['firstName']['S'], data['Item']['lastName']['S'], data['Item']['pic_url']['S']) list1.append(pair) count += 1 if (count == 10): break prediction(key) print(list1) remoteConn.close() return {'statusCode': 200, 'body': list1}
def _executor(q, conn): close = False if not conn: # This isn't a fixture so close manually close = True conn = DriverRemoteConnection( 'ws://localhost:45940/gremlin', 'gmodern', pool_size=4) try: g = traversal().withRemote(conn) future = g.V().promise() t = future.result() assert len(t.toList()) == 6 except: q.put(sys.exc_info()[0]) else: q.put('success!') # Close conn if close: conn.close()
def _executor(q, conn): close = False if not conn: # This isn't a fixture so close manually close = True conn = DriverRemoteConnection( 'ws://localhost:45940/gremlin', 'gmodern', pool_size=4) try: g = Graph().traversal().withRemote(conn) future = g.V().promise() t = future.result() assert len(t.toList()) == 6 except: q.put(sys.exc_info()[0]) else: q.put('success!') # Close conn if close: conn.close()
def get_graph(term: str): """ Retrieves all the related entities a term has in Neptune """ graph = Graph() connection = DriverRemoteConnection( f'wss://{os.environ["NEPTUNE_ENDPOINT"]}:8182/gremlin', 'g') g = graph.traversal().withRemote(connection) entities = g.V().has( "term", "value", term.upper()).out("has_entity").valueMap(True).toList() connection.close() entity_names = set( ) # Use this to filter out repeating entity names, `dedup()` does not working 100% of the time result = [] for e in entities: data = {} if e["value"][0] in entity_names: continue else: entity_names.add(e["value"][0]) for k, v in e.items(): k_str = str(k) if "T." in k_str: name = k_str.split(".")[1] data[name] = v continue data[k] = v[0] result.append(data) return result
def test_traversals(self): statics.load_statics(globals()) connection = DriverRemoteConnection('ws://localhost:45940/gremlin', 'g') assert "remoteconnection[ws://localhost:45940/gremlin,g]" == str( connection) g = Graph().traversal().withRemote(connection) assert long(6) == g.V().count().toList()[0] # assert Vertex(1) == g.V(1).next() assert 1 == g.V(1).id().next() assert Traverser(Vertex(1)) == g.V(1).nextTraverser() assert 1 == len(g.V(1).toList()) assert isinstance(g.V(1).toList(), list) results = g.V().repeat(out()).times(2).name results = results.toList() assert 2 == len(results) assert "lop" in results assert "ripple" in results # assert 10 == g.V().repeat(both()).times(5)[0:10].count().next() assert 1 == g.V().repeat(both()).times(5)[0:1].count().next() assert 0 == g.V().repeat(both()).times(5)[0:0].count().next() assert 4 == g.V()[2:].count().next() assert 2 == g.V()[:2].count().next() # results = g.withSideEffect( 'a', ['josh', 'peter' ]).V(1).out('created').in_('created').values('name').where( within('a')).toList() assert 2 == len(results) assert 'josh' in results assert 'peter' in results # todo: need a traversal metrics deserializer g.V().out().profile().next() connection.close()
def run_queries(cluster): cluster_url = 'ws://localhost:%s/gremlin' % cluster['Port'] # test Client API print('Connecting to Neptune Graph DB cluster URL: %s' % cluster_url) graph_client = gremlin_client.Client(cluster_url, 'g') values = '[1,2,3,4]' print('Submitting values: %s' % values) result_set = graph_client.submit(values) future_results = result_set.all() results = future_results.result() print('Received values from cluster: %s' % results) assert results == [1, 2, 3, 4] future_result_set = graph_client.submitAsync('[1,2,3,4]') result_set = future_result_set.result() result = result_set.one() assert result == [1, 2, 3, 4] assert result_set.done.done() graph_client.close() # test DriverRemoteConnection API graph = Graph() conn = DriverRemoteConnection(cluster_url, 'g') g = graph.traversal().withRemote(conn) vertices_before = g.V().toList() print('Existing vertices in the graph: %s' % vertices_before) print('Adding new vertices "v1" and "v2" to the graph') g.addV().property('id', 'v1').property('name', 'Vertex 1').next() g.addV().property('id', 'v2').property('name', 'Vertex 2').next() vertices_after = g.V().toList() print('New list of vertices in the graph: %s' % vertices_after) result = set(vertices_after) - set(vertices_before) assert len(result) == 2 conn.close()
# col3, col2 is col1 id_0 = name_to_id.get(vals[0], None) id_1 = name_to_id.get(vals[1], None) id_2 = name_to_id.get(vals[2], None) if id_0 is not None: if id_1 is not None: g.addE('is').from_(id_1).to(id_0).next() if id_2 is not None: g.addE('is').from_(id_2).to(id_0).next() # col2 knows col3 if id_1 is not None and id_2 is not None: g.addE('knows').from_(id_1).to(id_2).next() return if __name__ == "__main__": # connect gremlin server connection = DriverRemoteConnection('ws://localhost:8182/gremlin', 'g') g = traversal().withRemote(connection) # remove all vertices and edges in the graph g.V().drop().iterate() # g.E().drop().iterate() # populate data into graph database alerts = pd.read_csv('../data/alerts.csv', header = None).values load_data(g, alerts) connection.close() print("Data is loaded successfully.")
class JanusGraphClient(object): """ JanusGraph Client Builder which adds the Serializers for JanusGraph specific objects, predicates etc. """ REMOTE_CONNECTION = None def __init__(self, version=3.0): """ Initializing with GraphSON version 3.0 Args: version (int): """ self.graphsonVersion = version def connect(self, host="localhost", port="8182", traversal_source="g", **kwargs): """ Connect to JanusGraph's gremlin-server instance. Takes Host, Port and Graph Args: host (str): The HOST of JanusGraph gremlin-server instance. Defaults to localhost port (str): The PORT of JanusGraph gremlin-server instance. Defaults to 8182. graph (str): The GraphTraversalSource being exposed from gremlin-server instance. Defaults to g Keyword Args: graphson_reader (GraphSONReader): GraphSONReader object with required Deserializers registered graphson_writer (GraphSONWriter): GraphSONWriter object with required Serializers registered Raises: AttributeError: When invalid Keyword arguments is provided. The expected key needs to be `graphson_reader` and `graphson_writer`. Returns: JanusGraphClient """ URL = "ws://{}:{}/gremlin".format(host, port) if not kwargs: graphson_reader = JanusGraphSONReader().build() graphson_writer = JanusGraphSONWriter().build() else: if "graphson_reader" in kwargs and "graphson_writer" in kwargs: graphson_reader = kwargs["graphson_reader"] graphson_writer = kwargs["graphson_writer"] else: raise AttributeError( "Additional parameters if provided needs to be keywords arguments of " "`graphson_reader` and `graphson_writer`") self.REMOTE_CONNECTION = DriverRemoteConnection( URL, traversal_source, graphson_reader=graphson_reader, graphson_writer=graphson_writer) return self def get_connection(self): """ Get the RemoteConnection object, so that same can be used to create GraphTraversalSource. Returns: DriverRemoteConnection """ return self.REMOTE_CONNECTION def close(self): try: self.REMOTE_CONNECTION.close() return True except: return False
edge = g.V().has('first_name', 'Shane').addE('son_of').to(g.V().has('first_name', 'Edith')).property('known_since','1964').next() edge = g.V().has('first_name', 'Edith').addE('mother_of').to(g.V().has('first_name', 'Shane')).property('known_since','1964').next() edge = g.V().has('first_name', 'Shane').addE('husband_of').to(g.V().has('first_name', 'Mary')).property('known_since','1989').next() edge = g.V().has('first_name', 'Mary').addE('wife_of').to(g.V().has('first_name', 'Shane')).property('known_since','1989').next() edge = g.V().has('first_name', 'Shane').addE('father_of').to(g.V().has('first_name', 'Betty')).property('known_since','1991').next() edge = g.V().has('first_name', 'Betty').addE('daughter_of').to(g.V().has('first_name', 'Shane')).property('known_since','1991').next() edge = g.V().has('first_name', 'Mary').addE('mother_of').to(g.V().has('first_name', 'Betty')).property('known_since','1991').next() edge = g.V().has('first_name', 'Betty').addE('daughter_of').to(g.V().has('first_name', 'Mary')).property('known_since','1991').next() #print out all the node's first names print(g.V().first_name.toList()) #print out all the properties of person whose's first name is Shane print(g.V().has('person','first_name','Shane').valueMap().next()) #traversing the graph starting with Betty to then Shane to then Edith print(g.V().has('first_name', 'Betty').out('daughter_of').out('son_of').valueMap().toList()) print("\n\n\n") #Print out all the nodes people = g.V().valueMap().toList() print(people) #Print out all the connections (edges) connections = g.E().valueMap().toList() print(connections) #Closing the connection remoteConn.close()
class TrajectoryGraph: # use for binding LABEL = "label" CAMID = "camId" TIME = "time" IMAGE = "image" OUT_V = "outV" IN_V = "inV" LIMIT = "limit" VID = "vid" FEA = "feature" CONFIDENCE = "confidence" INDEX = "index" def __init__( self ): self.b = Bindings() self.graph = Graph() self.connection = DriverRemoteConnection('ws://130.207.122.57:8182/gremlin','g') self.g = self.graph.traversal().withRemote(self.connection) logging.info("Connected") def addDetection(self, vehId, camId, timestamp, index): v = self.g.addV(self.b.of(TrajectoryGraph.LABEL, vehId))\ .property(TrajectoryGraph.CAMID, self.b.of(TrajectoryGraph.CAMID, camId))\ .property(TrajectoryGraph.TIME, self.b.of(TrajectoryGraph.TIME, timestamp))\ .property(TrajectoryGraph.INDEX, self.b.of(TrajectoryGraph.INDEX, index))\ .id().next() logging.info("Trajectory Vertex v[{}] ({}, {}, {}) created.".format(v, vehId, camId, timestamp)) return v def linkDetection(self, src, dest, confidence): logging.info("Link vertex v[{}] to v[{}]. Confidence {}".format(src, dest, confidence)) self.g.V(self.b.of(TrajectoryGraph.OUT_V, src))\ .as_("a")\ .V(self.b.of(TrajectoryGraph.IN_V, dest))\ .addE(self.b.of(TrajectoryGraph.LABEL, "next"))\ .from_("a")\ .property(TrajectoryGraph.CONFIDENCE, self.b.of(TrajectoryGraph.CONFIDENCE, confidence))\ .iterate() def getValueMapById(self,id): value = self.g.V(self.b.of(TrajectoryGraph.VID, id)).valueMap(True).next() logging.info("Get detection valuemap {} for V[{}]".format(value.keys(), id)) return value def getLatestDetectionsByCamId(self, camId,limit): # timelimit support can be considered. vehIds = self.g.V().has(TrajectoryGraph.CAMID, self.b.of(TrajectoryGraph.CAMID, camId)).order().by(TrajectoryGraph.TIME, Order.decr).limit(self.b.of(TrajectoryGraph.LIMIT, limit)).id().toList() logging.info("LatestDetections by camera {}: {}".format(camId, vehIds)) return vehIds def getNextDetectionsById(self, id, limit): # This can be used to return self vehIds = self.g.V(self.b.of(TrajectoryGraph.OUT_V, id)).emit().repeat(__.out()).times(self.b.of(TrajectoryGraph.LIMIT, limit)).id().toList() logging.info("NextDetections from V[{}]: {}".format(id, vehIds)) return vehIds def getPrevDetectionsById(self, id, limit): vehIds = self.g.V(self.b.of(TrajectoryGraph.IN_V, id)).repeat(__.in_()).times(limit).emit().id().toList() vehIds = vehIds[::-1] logging.info("PrevDetections from V[{}]: {}".format(id, vehIds)) return vehIds def clear(self): logging.info("TrajectoryGraph dropped") self.g.V().drop().iterate() def shutdown(self): logging.info("TrajectoryGraph closed") self.connection.close() self.g = None self.graph = None
class GremlinStorageConnector(BaseStorageConnector): """Storage Connector for Gremlin databases.""" _g: Optional[Traversal] = None connection: Optional[DriverRemoteConnection] = None def __init__(self, endpoint_url: str, supports_multiple_labels=False, test_prefix: str = "", **kwargs) -> None: """Create a GremlinStorageConnector. Arguments: endpoint_url: The url of the gremlin endpoint to connect to (e.g. ``ws://localhost:8182``) supports_multiple_labels: Some GraphDBs (Neptune/Neo4J) support multiple labels on a single vertex. test_prefix: A prefix that will be prepended to edge and vertex ids to allow scenario separation. **kwargs: Any unspecified args will be pased to the ``DriverRemoteConnection`` object. """ self.endpoint_url = endpoint_url self.supports_multiple_labels = supports_multiple_labels self.test_prefix = test_prefix self.connection_args = kwargs def init(self) -> None: ... @property def g(self) -> Traversal: if not self.connection: self.open() if not self._g: self._g = traversal().withRemote(self.connection) return self._g def open(self) -> DriverRemoteConnection: if not self.connection: logger.debug("Opening connection to %s", self.endpoint_url) self.connection = DriverRemoteConnection( f"{self.endpoint_url}/gremlin", "g", **self.connection_args) def close(self) -> None: logger.debug("Closing gremlin connection") if self.connection: self.connection.close() self.connection = None self._g = None def write_resource(self, resource: CloudWandererResource) -> None: """Persist a single resource to storage. Arguments: resource (CloudWandererResource): The CloudWandererResource to write. """ self._write_resource(resource) self._write_dependent_resource_edges(resource) def _write_resource(self, resource: CloudWandererResource) -> None: primary_label = generate_primary_label(resource.urn) traversal = self._write_vertex(vertex_id=self.generate_vertex_id( resource.urn), vertex_labels=[primary_label]) traversal = (traversal.property( Cardinality.single, "_cloud_name", resource.urn.cloud_name).property( Cardinality.single, "_account_id", resource.urn.account_id).property( Cardinality.single, "_region", resource.urn.region).property( Cardinality.single, "_service", resource.urn.service).property( Cardinality.single, "_resource_type", resource.urn.resource_type).property( Cardinality.single, "_discovery_time", resource.discovery_time.isoformat()).property( Cardinality.single, "_urn", str(resource.urn))) for id_part in resource.urn.resource_id_parts: traversal.property(Cardinality.set_, "_resource_id_parts", id_part) self._write_properties( traversal=traversal, properties=resource.cloudwanderer_metadata.resource_data) self._write_relationships(resource) self._clean_up_relationships(urn=resource.urn, cutoff=resource.discovery_time) if not resource.urn.is_partial: self._repoint_vertex_edges(vertex_label=primary_label, new_resource_urn=resource.urn) def _write_dependent_resource_edges( self, resource: CloudWandererResource) -> None: for dependent_urn in resource.dependent_resource_urns: logger.debug("Writing dependent resource edge from %s to %s", resource.urn, dependent_urn) self._write_edge( edge_id=self.generate_edge_id(resource.urn, dependent_urn), edge_label="has", source_vertex_id=self.generate_vertex_id(resource.urn), destination_vertex_id=self.generate_vertex_id(dependent_urn), owner_id=self.generate_vertex_id(resource.urn), discovery_time=resource.discovery_time, ) def _clean_up_relationships(self, urn: PartialUrn, cutoff: datetime) -> None: logger.debug("Cleaning up edges owned by %s discovered before %s", self.generate_vertex_id(urn), cutoff) (self.g.V(self.generate_vertex_id(urn)).bothE().as_("edge").has( "_edge_owner", self.generate_vertex_id(urn)).where( __.values("_discovery_time").is_(P.lt( cutoff.isoformat()))).select("edge").drop()).iterate() def _write_relationships(self, resource: CloudWandererResource) -> None: for relationship in resource.relationships: inferred_partner_urn = relationship.partial_urn try: pre_existing_resource_urn = (self._lookup_resource( relationship.partial_urn).propertyMap().toList()[0]["_urn"] [0].value) except IndexError: pre_existing_resource_urn = None if pre_existing_resource_urn: logger.debug( "Writing relationship with pre_existing_resource_urn %s", pre_existing_resource_urn) self._write_relationship_edge( resource_urn=resource.urn, relationship_resource_urn=pre_existing_resource_urn, direction=relationship.direction, discovery_time=resource.discovery_time, ) if pre_existing_resource_urn != inferred_partner_urn: self._delete_relationship_edge( resource_urn=resource.urn, relationship_resource_urn=inferred_partner_urn, direction=relationship.direction, ) continue logger.debug("Writing inferred resource %s", inferred_partner_urn) self._write_resource( CloudWandererResource(urn=cast(URN, relationship.partial_urn), resource_data={})) self._write_relationship_edge( resource_urn=resource.urn, relationship_resource_urn=inferred_partner_urn, direction=relationship.direction, discovery_time=resource.discovery_time, ) def _repoint_vertex_edges( self, vertex_label: str, new_resource_urn: Union[URN, PartialUrn]) -> None: # https://tinkerpop.apache.org/docs/current/recipes/#edge-move resources_of_the_same_type = self.g.V().as_("old_vertex").hasLabel( vertex_label) for id_part in new_resource_urn.resource_id_parts: resources_of_the_same_type.has("_resource_id_parts", id_part) resources_with_same_id_but_unknown = (resources_of_the_same_type.or_( __.has("_account_id", "unknown"), __.has("_region", "unknown"))).toList() for old_vertex in resources_with_same_id_but_unknown: # Outbound old_vertices_outbound_edges = self.g.V(old_vertex).outE().as_("e1") old_outbound_edges_partner_vertex = old_vertices_outbound_edges.inV( ).as_("b") new_vertex = old_outbound_edges_partner_vertex.V( self.generate_vertex_id(new_resource_urn)).as_("new_vertex") add_old_outbound_edges_to_new_vertex = ( new_vertex.addE("has").to("b").as_("e2").sideEffect( __.select("e1").properties().unfold().as_("p").select( "e2").property( __.select("p").key(), __.select("p").value()))) add_old_outbound_edges_to_new_vertex.select("e1").drop().iterate() # Inbound old_vertices_inbound_edges = self.g.V(old_vertex).select( "old_vertex").inE().as_("old_inbound_edge") old_inbound_edges_partner_vertex = old_vertices_inbound_edges.inV( ).as_("c") new_vertex = old_inbound_edges_partner_vertex.select("new_vertex") add_old_inbound_edges_to_new_vertex = (new_vertex.addE( "has").from_("c").as_("new_inbound_edge").sideEffect( __.select("old_inbound_edge").properties().unfold().as_( "p").select("new_inbound_edge").property( __.select("p").key(), __.select("p").value()))) add_old_inbound_edges_to_new_vertex.select( "old_inbound_edge").drop().iterate() # Delete old vertex self.g.V(old_vertex).drop().iterate() def _delete_relationship_edge(self, resource_urn: PartialUrn, relationship_resource_urn: PartialUrn, direction: RelationshipDirection) -> None: if direction == RelationshipDirection.INBOUND: self._delete_edge( self.generate_edge_id(relationship_resource_urn, resource_urn)) else: self._delete_edge( self.generate_edge_id(resource_urn, relationship_resource_urn)) def _write_relationship_edge( self, resource_urn: PartialUrn, relationship_resource_urn: PartialUrn, direction: RelationshipDirection, discovery_time: datetime, ) -> None: logger.debug("Writing edge relationship between %s and %s", resource_urn, relationship_resource_urn) if direction == RelationshipDirection.INBOUND: self._write_edge( edge_id=self.generate_edge_id(relationship_resource_urn, resource_urn), edge_label="has", source_vertex_id=self.generate_vertex_id( relationship_resource_urn), destination_vertex_id=self.generate_vertex_id(resource_urn), owner_id=self.generate_vertex_id(resource_urn), discovery_time=discovery_time, ) else: self._write_edge( edge_id=self.generate_edge_id(resource_urn, relationship_resource_urn), edge_label="has", source_vertex_id=self.generate_vertex_id(resource_urn), destination_vertex_id=self.generate_vertex_id( relationship_resource_urn), owner_id=self.generate_vertex_id(resource_urn), discovery_time=discovery_time, ) def _lookup_resource(self, partial_urn: PartialUrn) -> Traversal: vertex_label = generate_primary_label(partial_urn) logger.debug("looking up resource with label %s", vertex_label) traversal = (self.g.V().hasLabel(vertex_label).has( "_cloud_name", partial_urn.cloud_name).has("_service", partial_urn.service).has( "_resource_type", partial_urn.resource_type)) for id_part in partial_urn.resource_id_parts: traversal.has("_resource_id_parts", id_part) if partial_urn.account_id != "unknown": traversal.has("_account_id", partial_urn.account_id) if partial_urn.region != "unknown": traversal.has("_region", partial_urn.region) return traversal def _write_vertex(self, vertex_id: str, vertex_labels: List[str]) -> Traversal: logger.debug("Writing vertex %s", vertex_id) if self.supports_multiple_labels: vertex_label = "::".join(vertex_labels) else: vertex_label = vertex_labels[0] return self.g.V(vertex_id).fold().coalesce( __.unfold(), __.addV(vertex_label).property(T.id, vertex_id)) def _write_properties(self, traversal: Traversal, properties: Dict[str, Any]) -> Traversal: logger.debug("Writing properties: %s", properties) for property_name, property_value in properties.items(): traversal = traversal.property(Cardinality.single, str(property_name), str(property_value)) traversal_size = len(str(traversal).encode()) try: traversal.next() except RuntimeError as ex: raise RuntimeError( "GremlinStorageConnector got a runtime error while saving a property of " f"{traversal_size} bytes, check your Gremlin server's maxContentLength is larger than this." ) from ex def _write_edge( self, edge_id: str, edge_label: str, source_vertex_id: str, destination_vertex_id: str, owner_id: str, discovery_time: datetime, ) -> Traversal: logger.debug("Looking for edge %s", edge_id) edge = self.g.E(edge_id).property("_discovery_time", discovery_time.isoformat()).toList() if not edge: logger.debug("Writing edge between %s and %s", source_vertex_id, destination_vertex_id) (self.g.V(source_vertex_id).as_("source").V(destination_vertex_id). addE(edge_label).from_("source").property(T.id, edge_id).property( "_edge_owner", owner_id).property("_discovery_time", discovery_time.isoformat())).next() def _delete_edge(self, edge_id: str) -> Traversal: logger.debug("Deleting edge %s", edge_id) self.g.E(edge_id).drop().iterate() def read_all(self) -> Iterator[dict]: """Return all records from storage.""" for vertex in self.g.V().has("_urn").valueMap().toList(): yield vertex def read_resource(self, urn: URN) -> Optional[CloudWandererResource]: """Return a resource matching the supplied urn from storage. Arguments: urn (URN): The AWS URN of the resource to return """ return next(self.read_resources(urn=urn)) def read_resources( self, cloud_name: str = None, account_id: str = None, region: str = None, service: str = None, resource_type: str = None, urn: Union[URN, PartialUrn] = None, ) -> Iterator["CloudWandererResource"]: """Yield a resource matching the supplied urn from storage. All arguments are optional. Arguments: cloud_name: The name of the cloud in question (e.g. ``aws``) urn: The AWS URN of the resource to return account_id: Cloud Account ID (e.g. ``111111111111``) region: AWS region (e.g. ``'eu-west-2'``) service: Service name (e.g. ``'ec2'``) resource_type: Resource Type (e.g. ``'instance'``) """ if not urn: urn = PartialUrn( cloud_name=cloud_name or "unknown", service=service or "unknown", account_id=account_id or "unknown", region=region or "unknown", resource_type=resource_type or "unknown", ) for vertex in self._lookup_resource( partial_urn=urn).propertyMap().toList(): yield CloudWandererResource( urn=URN.from_string(vertex["_urn"][0].value), resource_data=_normalise_gremlin_attrs(vertex), discovery_time=datetime.strptime( vertex["_discovery_time"][0].value, ISO_DATE_FORMAT), ) def delete_resource(self, urn: URN) -> None: """Delete this resource and all its resource attributes. Arguments: urn (URN): The URN of the resource to delete """ logger.debug("Deleting resource %s", urn) self.g.V(self.generate_vertex_id(urn)).drop().iterate() def delete_resource_of_type_in_account_region( self, cloud_name: str, service: str, resource_type: str, account_id: str, region: str, cutoff: Optional[datetime], ) -> None: """Delete resources of type in account and region unless in list of URNs. This is used primarily to clean up old resources. Arguments: cloud_name: The name of the cloud in question (e.g. ``aws``) account_id: Cloud Account ID (e.g. ``111111111111``) region: Cloud region (e.g. ``'eu-west-2'``) service: Service name (e.g. ``'ec2'``) resource_type: Resource Type (e.g. ``'instance'``) cutoff: Delete any resource discovered before this time """ partial_urn = PartialUrn( cloud_name=cloud_name, service=service, account_id=account_id, region=region, resource_type=resource_type, ) logger.debug( "Deleting resources that match %s that were discovered before %s", partial_urn, cutoff) traversal = self._lookup_resource(partial_urn=partial_urn) if cutoff: traversal.where( __.values("_discovery_time").is_(P.lt(cutoff.isoformat()))) traversal.drop().iterate() def generate_vertex_id(self, urn: PartialUrn) -> str: """Generate a vertex id. Arguments: urn: The URN of the vertex to create. """ return f"{self.test_prefix}{urn}" def generate_edge_id(self, source_urn: PartialUrn, destination_urn: PartialUrn) -> str: """Generate a primary edge id. Arguments: source_urn: The URN of the resource we're generating an edge from. destination_urn: The URN of the resource we're generating an edge to. """ logger.debug("Generating edge id: %s", source_urn) return f"{self.test_prefix}{source_urn}#{destination_urn}"
class GremlinWrapper(object): def __init__(self, remote_gremlin_server): self.remote_server = remote_gremlin_server self.remote_connection = DriverRemoteConnection( remote_gremlin_server, 'g') self.g = traversal().withRemote(self.remote_connection) statics.load_statics(globals()) def add_indigitous_user(self, login, email, name, uid): # add a user to the gremlin graph self.g.V() \ .has('indigitous_user', 'uid', uid) \ .fold() \ .coalesce( unfold(), addV('indigitous_user') \ .property('name', name) \ .property('uid', uid) \ .property('email', email) \ .property('login', login)) \ .toList() def add_github_user(self, login, email, name, uid): # add a user to the gremlin graph return self.g.V().has('github_user', 'uid', uid) \ .fold() \ .coalesce( unfold(), addV('github_user') .property('name', name) \ .property('uid', uid) \ .property('email', email) \ .property('login', login)) \ .toList() def add_repository(self, name): # add a user to the gremlin graph return self.g.V() \ .has('repository', 'name', name) \ .fold() \ .coalesce( unfold(), addV('github_user') \ .property('name', name)) \ .iterate() def get_list_of_indigitous_users(self): #get a list of all people through name property. return self.g.V() \ .hasLabel('indigitous_user') \ .group() \ .by(__.id()) \ .by('email') \ .toList() def edge_vertices(self, label, from_v, to_v): # Edge vertices return self.g.E() \ .hasLabel(label) \ .where(outV().hasId(from_v)) \ .where(inV().hasId(to_v)) \ .fold() \ .coalesce( \ unfold(), \ addE(label) \ .from_(V(from_v)) \ .to(V(to_v))) \ .iterate() def close(self): self.remote_connection.close()
results = t.side_effects.get('b') assert not results # But 'a' should still be cached locally results = t.side_effects.get('a') assert results # 'a' should have been added to local keys cache, but not 'b' results = t.side_effects.keys() assert len(results) == 1 a, = results assert a == 'a' # Try to get 'b' directly from server, should throw error with pytest.raises(Exception): t.side_effects.value_lambda('b') connection.close() if __name__ == '__main__': test = False try: connection = DriverRemoteConnection('ws://localhost:8182/gremlin', 'g') test = True connection.close() except: print("GremlinServer is not running and this test case will not execute: " + __file__) if test: unittest.main()