def getPaths(v1, v2): # execute gremiln query try: start = time.time() p = g.withSideEffect("Neptune#repeatMode","CHUNKED_DFS").withSack(0).V().hasId(v1). \ repeat(__.outE().sack(Operator.sum).by('weight').inV().simplePath()).times(3). \ emit(__.hasId(v2)).hasId(v2).limit(300).order().by(__.sack(),Order.incr). \ local(__.union(__.path().by(T.id).by('weight'),__.sack()).fold()). \ toList() end = time.time() timeDelta = end - start return v1, v2, timeDelta except Exception as e: return "error", str(e)
def _column_entities(cls, *, _g: GraphTraversalSource, tables_ids: Iterable[str], existing: EXISTING) -> None: # fetch database -> cluster -> schema -> table links g = _g.V(tuple(tables_ids)) g = g.outE(EdgeTypes.Column.value.label) g = g.inV().hasLabel(VertexTypes.Column.value.label).as_('columns') # fetch column -> links (no Stat) for t in [EdgeTypes.Description]: g = g.coalesce(__.select('columns').outE( t.value.label).fold()).as_(t.name) g = g.select(EdgeTypes.Description.name).unfold() g = g.local( __.union(__.outV().id(), __.valueMap(True), __.inV().id()).fold()) cls._into_existing(g.toList(), existing)
def expire_connections_for_other(cls, *, _g: GraphTraversalSource, vertex_type: VertexType, keys: FrozenSet[str], existing: EXISTING) -> None: # V().has(label, 'key', P.without(keys)) is more intuitive but doesn't scale, so instead just find all those g = _g.V().hasLabel(vertex_type.label).where(__.bothE()) g = g.values(WellKnownProperties.Key.value.name) all_to_expire_keys = set(g.toList()).difference(keys) # TODO: when any vertex ids that need something besides key all_to_expire = set( vertex_type.id(key=key) for key in all_to_expire_keys) for to_expire in chunk(all_to_expire, 1000): g = _g.V(tuple(to_expire)).bothE() g = g.local( __.union(__.outV().id(), __.valueMap(True), __.inV().id()).fold()) cls._into_existing(g.toList(), existing)
most_runways = g.V().has('runways',P.gte(5)).\ order().\ by('runways',Order.desc).\ local(__.values('code','runways').fold()).\ toList() heading("Airports with the most runways") for rows in most_runways: print(rows[0],rows[1]) # Shortest routes by distance from AUS to WLG. # Note the use of the Operator enum. routes = g.withSack(0).\ V().\ has('code','AUS').\ repeat(__.outE().sack(Operator.sum).by('dist').\ inV().simplePath()).\ until(__.has('code','WLG')).\ limit(10).\ order().\ by(__.sack()).\ local(__.union(__.path().by('code').by('dist'),__.sack()).fold()).\ toList() heading("Sack step tests") for route in routes: print(route) # All done so close the connetion connection.close()
# Connect to a Gremlin Server using a remote connection and issue some basic queries. # Import some classes we will need to talk to our graph from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection from gremlin_python.structure.graph import Graph from gremlin_python import statics from gremlin_python.process.graph_traversal import __ from gremlin_python.process.strategies import * from gremlin_python.process.traversal import * # Path to our graph (this assumes a locally running Gremlin Server) # Note how the path is a Web Socket (ws) connection. endpoint = 'ws://neptunedbcluster-70fbv2otqb11.cluster-c814mxxksbjw.us-east-1.neptune.amazonaws.com:8182/gremlin' # Obtain a graph traversal source using a remote connection graph = Graph() g = graph.traversal().withRemote(DriverRemoteConnection(endpoint, 'g')) v1 = '5031468' v2 = '3140001' # rel-sci specific code to loop through two files. res = g.withSideEffect("Neptune#repeatMode","CHUNKED_DFS").withSack(0).V().hasId(v1). \ repeat(__.outE().sack(Operator.sum).by('weight').inV().simplePath()).times(3). \ emit(__.hasId(v2)).hasId(v2).limit(300).order().by(__.sack(),Order.incr). \ local(__.union(__.path().by(T.id).by('weight'),__.sack()).fold()). \ toList() print res
def test_translations(self): g = traversal().withGraph(Graph()) tests = list() # 0 tests.append([g.V(), "g.V()"]) # 1 tests.append([g.V('1', '2', '3', '4'), "g.V('1','2','3','4')"]) # 2 tests.append([g.V('3').valueMap(True), "g.V('3').valueMap(True)"]) # 3 tests.append([g.V().constant(5), "g.V().constant(5)"]) # 4 tests.append([g.V().constant(1.5), "g.V().constant(1.5)"]) # 5 tests.append([g.V().constant('Hello'), "g.V().constant('Hello')"]) # 6 tests.append([g.V().hasLabel('airport').limit(5), "g.V().hasLabel('airport').limit(5)"]) # 7 tests.append([g.V().hasLabel(within('a', 'b', 'c')), "g.V().hasLabel(within(['a','b','c']))"]) # 8 tests.append([g.V().hasLabel('airport', 'continent').out().limit(5), "g.V().hasLabel('airport','continent').out().limit(5)"]) # 9 tests.append([g.V().hasLabel('airport').out().values('code').limit(5), "g.V().hasLabel('airport').out().values('code').limit(5)"]) # 10 tests.append([g.V('3').as_('a').out('route').limit(10).where(eq('a')).by('region'), "g.V('3').as('a').out('route').limit(10).where(eq('a')).by('region')"]) # 11 tests.append([g.V('3').repeat(__.out('route').simplePath()).times(2).path().by('code'), "g.V('3').repeat(__.out('route').simplePath()).times(2).path().by('code')"]) # 12 tests.append([g.V().hasLabel('airport').out().has('region', 'US-TX').values('code').limit(5), "g.V().hasLabel('airport').out().has('region','US-TX').values('code').limit(5)"]) # 13 tests.append([g.V().hasLabel('airport').union(__.values('city'), __.values('region')).limit(5), "g.V().hasLabel('airport').union(__.values('city'),__.values('region')).limit(5)"]) # 14 tests.append([g.V('3').as_('a').out('route', 'routes'), "g.V('3').as('a').out('route','routes')"]) # 15 tests.append([g.V().where(__.values('runways').is_(5)), "g.V().where(__.values('runways').is(5))"]) # 16 tests.append([g.V('3').repeat(__.out().simplePath()).until(__.has('code', 'AGR')).path().by('code').limit(5), "g.V('3').repeat(__.out().simplePath()).until(__.has('code','AGR')).path().by('code').limit(5)"]) # 17 tests.append([g.V().hasLabel('airport').order().by(__.id()), "g.V().hasLabel('airport').order().by(__.id())"]) # 18 tests.append([g.V().hasLabel('airport').order().by(T.id), "g.V().hasLabel('airport').order().by(T.id)"]) # 19 tests.append([g.V().hasLabel('airport').order().by(__.id(),Order.desc), "g.V().hasLabel('airport').order().by(__.id(),Order.desc)"]) # 20 tests.append([g.V().hasLabel('airport').order().by('code',Order.desc), "g.V().hasLabel('airport').order().by('code',Order.desc)"]) # 21 tests.append([g.V('1', '2', '3').local(__.out().out().dedup().fold()), "g.V('1','2','3').local(__.out().out().dedup().fold())"]) # 22 tests.append([g.V('3').out().path().count(Scope.local), "g.V('3').out().path().count(Scope.local)"]) # 23 tests.append([g.E().count(), "g.E().count()"]) # 24 tests.append([g.V('5').outE('route').inV().path().limit(10), "g.V('5').outE('route').inV().path().limit(10)"]) # 25 tests.append([g.V('5').propertyMap().select(Column.keys), "g.V('5').propertyMap().select(Column.keys)"]) # 26 tests.append([g.V('5').propertyMap().select(Column.values), "g.V('5').propertyMap().select(Column.values)"]) # 27 tests.append([g.V('3').values('runways').math('_ + 1'), "g.V('3').values('runways').math('_ + 1')"]) # 28 tests.append([g.V('3').emit().repeat(__.out().simplePath()).times(3).limit(5).path(), "g.V('3').emit().repeat(__.out().simplePath()).times(3).limit(5).path()"]) # 29 tests.append([g.V().match(__.as_('a').has('code', 'LHR').as_('b')).select('b').by('code'), "g.V().match(__.as('a').has('code','LHR').as('b')).select('b').by('code')"]) # 30 tests.append([g.V().has('test-using-keyword-as-property','repeat'), "g.V().has('test-using-keyword-as-property','repeat')"]) # 31 tests.append([g.V('1').addE('test').to(__.V('4')), "g.V('1').addE('test').to(__.V('4'))"]) # 32 tests.append([g.V().values('runways').max(), "g.V().values('runways').max()"]) # 33 tests.append([g.V().values('runways').min(), "g.V().values('runways').min()"]) # 34 tests.append([g.V().values('runways').sum(), "g.V().values('runways').sum()"]) # 35 tests.append([g.V().values('runways').mean(), "g.V().values('runways').mean()"]) # 36 tests.append([g.withSack(0).V('3', '5').sack(Operator.sum).by('runways').sack(), "g.withSack(0).V('3','5').sack(Operator.sum).by('runways').sack()"]) # 37 tests.append([g.V('3').values('runways').store('x').V('4').values('runways').store('x').by(__.constant(1)).V('6').store('x').by(__.constant(1)).select('x').unfold().sum(), "g.V('3').values('runways').store('x').V('4').values('runways').store('x').by(__.constant(1)).V('6').store('x').by(__.constant(1)).select('x').unfold().sum()"]) # 38 tests.append([g.inject(3, 4, 5), "g.inject(3,4,5)"]) # 39 tests.append([g.inject([3, 4, 5]), "g.inject([3, 4, 5])"]) # 40 tests.append([g.inject(3, 4, 5).count(), "g.inject(3,4,5).count()"]) # 41 tests.append([g.V().has('runways', gt(5)).count(), "g.V().has('runways',gt(5)).count()"]) # 42 tests.append([g.V().has('runways', lte(5.3)).count(), "g.V().has('runways',lte(5.3)).count()"]) # 43 tests.append([g.V().has('code', within(123,124)), "g.V().has('code',within([123,124]))"]) # 44 tests.append([g.V().has('code', within(123, 'abc')), "g.V().has('code',within([123,'abc']))"]) # 45 tests.append([g.V().has('code', within('abc', 123)), "g.V().has('code',within(['abc',123]))"]) # 46 tests.append([g.V().has('code', within('abc', 'xyz')), "g.V().has('code',within(['abc','xyz']))"]) # 47 tests.append([g.V('1', '2').has('region', P.within('US-TX','US-GA')), "g.V('1','2').has('region',within(['US-TX','US-GA']))"]) # 48 tests.append([g.V().and_(__.has('runways', P.gt(5)), __.has('region','US-TX')), "g.V().and(__.has('runways',gt(5)),__.has('region','US-TX'))"]) # 49 tests.append([g.V().union(__.has('runways', gt(5)), __.has('region','US-TX')), "g.V().union(__.has('runways',gt(5)),__.has('region','US-TX'))"]) # 50 tests.append([g.V('3').choose(__.values('runways').is_(3),__.constant('three'),__.constant('not three')), "g.V('3').choose(__.values('runways').is(3),__.constant('three'),__.constant('not three'))"]) # 51 tests.append([g.V('3').choose(__.values('runways')).option(1,__.constant('three')).option(2,__.constant('not three')), "g.V('3').choose(__.values('runways')).option(1,__.constant('three')).option(2,__.constant('not three'))"]) # 52 tests.append([g.V('3').choose(__.values('runways')).option(1.5,__.constant('one and a half')).option(2,__.constant('not three')), "g.V('3').choose(__.values('runways')).option(1.5,__.constant('one and a half')).option(2,__.constant('not three'))"]) # 53 tests.append([g.V('3').repeat(__.out().simplePath()).until(__.loops().is_(1)).count(), "g.V('3').repeat(__.out().simplePath()).until(__.loops().is(1)).count()"]) # 54 tests.append([g.V().hasLabel('airport').limit(20).group().by('region').by('code').order(Scope.local).by(Column.keys), "g.V().hasLabel('airport').limit(20).group().by('region').by('code').order(Scope.local).by(Column.keys)"]) # 55 tests.append([g.V('1').as_('a').V('2').as_('a').select(Pop.all_, 'a'), "g.V('1').as('a').V('2').as('a').select(Pop.all,'a')"]) # 56 tests.append([g.addV('test').property(Cardinality.set_, 'p1', 10), "g.addV('test').property(Cardinality.set,'p1',10)"]) # 57 tests.append([g.addV('test').property(Cardinality.list_, 'p1', 10), "g.addV('test').property(Cardinality.list,'p1',10)"]) # 58 tests.append([g.addV('test').property(Cardinality.single, 'p1', 10), "g.addV('test').property(Cardinality.single,'p1',10)"]) # 59 tests.append([g.V().limit(5).order().by(T.label), "g.V().limit(5).order().by(T.label)"]) # 60 tests.append([g.V().range(1, 5), "g.V().range(1,5)"]) # 61 tests.append([g.addV('test').property('p1', 123), "g.addV('test').property('p1',123)"]) # 62 tests.append([g.addV('test').property('date',datetime(2021, 2, 1, 9, 30)), "g.addV('test').property('date',new Date(121,2,1,9,30,0))"]) # 63 tests.append([g.addV('test').property('date',datetime(2021, 2, 1)), "g.addV('test').property('date',new Date(121,2,1,0,0,0))"]) # 64 tests.append([g.addE('route').from_(__.V('1')).to(__.V('2')), "g.addE('route').from(__.V('1')).to(__.V('2'))"]) # 65 tests.append([g.withSideEffect('a', [1, 2]).V('3').select('a'), "g.withSideEffect('a',[1, 2]).V('3').select('a')"]) # 66 tests.append([g.withSideEffect('a', 1).V('3').select('a'), "g.withSideEffect('a',1).V('3').select('a')"]) # 67 tests.append([g.withSideEffect('a', 'abc').V('3').select('a'), "g.withSideEffect('a','abc').V('3').select('a')"]) # 68 tests.append([g.V().has('airport', 'region', 'US-NM').limit(3).values('elev').fold().index(), "g.V().has('airport','region','US-NM').limit(3).values('elev').fold().index()"]) # 69 tests.append([g.V('3').repeat(__.timeLimit(1000).out().simplePath()).until(__.has('code', 'AGR')).path(), "g.V('3').repeat(__.timeLimit(1000).out().simplePath()).until(__.has('code','AGR')).path()"]) # 70 tests.append([g.V().hasLabel('airport').where(__.values('elev').is_(gt(14000))), "g.V().hasLabel('airport').where(__.values('elev').is(gt(14000)))"]) # 71 tests.append([g.V().hasLabel('airport').where(__.out().count().is_(gt(250))).values('code'), "g.V().hasLabel('airport').where(__.out().count().is(gt(250))).values('code')"]) # 72 tests.append([g.V().hasLabel('airport').filter(__.out().count().is_(gt(250))).values('code'), "g.V().hasLabel('airport').filter(__.out().count().is(gt(250))).values('code')"]) # 73 tests.append([g.withSack(0). V('3'). repeat(__.outE('route').sack(Operator.sum).by('dist').inV()). until(__.has('code', 'AGR').or_().loops().is_(4)). has('code', 'AGR'). local(__.union(__.path().by('code').by('dist'),__.sack()).fold()). limit(10), "g.withSack(0).V('3').repeat(__.outE('route').sack(Operator.sum).by('dist').inV()).until(__.has('code','AGR').or().loops().is(4)).has('code','AGR').local(__.union(__.path().by('code').by('dist'),__.sack()).fold()).limit(10)"]) # 74 tests.append([g.addV().as_('a').addV().as_('b').addE('knows').from_('a').to('b'), "g.addV().as('a').addV().as('b').addE('knows').from('a').to('b')"]) # 75 tests.append([g.addV('Person').as_('a').addV('Person').as_('b').addE('knows').from_('a').to('b'), "g.addV('Person').as('a').addV('Person').as('b').addE('knows').from('a').to('b')"]) # 76 tests.append([g.V('3').project('Out','In').by(__.out().count()).by(__.in_().count()), "g.V('3').project('Out','In').by(__.out().count()).by(__.in().count())"]) # 77 tests.append([g.V('44').out().aggregate('a').out().where(within('a')).path(), "g.V('44').out().aggregate('a').out().where(within(['a'])).path()"]) # 78 tests.append([g.V().has('date', datetime(2021, 2, 22)), "g.V().has('date',new Date(121,2,22,0,0,0))"]) # 79 tests.append([g.V().has('date', within(datetime(2021, 2, 22), datetime(2021, 1, 1))), "g.V().has('date',within([new Date(121,2,22,0,0,0),new Date(121,1,1,0,0,0)]))"]) # 80 tests.append([g.V().has('date', between(datetime(2021, 1, 1), datetime(2021, 2, 22))), "g.V().has('date',between(new Date(121,1,1,0,0,0),new Date(121,2,22,0,0,0)))"]) # 81 tests.append([g.V().has('date', inside(datetime(2021, 1, 1),datetime(2021, 2, 22))), "g.V().has('date',inside(new Date(121,1,1,0,0,0),new Date(121,2,22,0,0,0)))"]) # 82 tests.append([g.V().has('date', P.gt(datetime(2021, 1, 1, 9, 30))), "g.V().has('date',gt(new Date(121,1,1,9,30,0)))"]) # 83 tests.append([g.V().has('runways', between(3,5)), "g.V().has('runways',between(3,5))"]) # 84 tests.append([g.V().has('runways', inside(3,5)), "g.V().has('runways',inside(3,5))"]) # 85 tests.append([g.V('44').outE().elementMap(), "g.V('44').outE().elementMap()"]) # 86 tests.append([g.V('44').valueMap().by(__.unfold()), "g.V('44').valueMap().by(__.unfold())"]) # 87 tests.append([g.V('44').valueMap().with_(WithOptions.tokens,WithOptions.labels), "g.V('44').valueMap().with(WithOptions.tokens,WithOptions.labels)"]) # 88 tests.append([g.V('44').valueMap().with_(WithOptions.tokens), "g.V('44').valueMap().with(WithOptions.tokens)"]) # 89 tests.append([g.withStrategies(ReadOnlyStrategy()).addV('test'), "g.withStrategies(new ReadOnlyStrategy()).addV('test')"]) # 90 strategy = SubgraphStrategy(vertices=__.has('region', 'US-TX'), edges=__.hasLabel('route')) tests.append([g.withStrategies(strategy).V().count(), "g.withStrategies(new SubgraphStrategy(vertices:__.has('region','US-TX'),edges:__.hasLabel('route'))).V().count()"]) # 91 strategy = SubgraphStrategy(vertex_properties=__.hasNot('runways')) tests.append([g.withStrategies(strategy).V().count(), "g.withStrategies(new SubgraphStrategy(vertexProperties:__.hasNot('runways'))).V().count()"]) # 92 strategy = SubgraphStrategy(vertices=__.has('region', 'US-TX'),vertex_properties=__.hasNot('runways')) tests.append([g.withStrategies(strategy).V().count(), "g.withStrategies(new SubgraphStrategy(vertices:__.has('region','US-TX'),vertexProperties:__.hasNot('runways'))).V().count()"]) # 93 strategy = SubgraphStrategy(vertices=__.has('region', 'US-TX'), edges=__.hasLabel('route')) tests.append([g.withStrategies(ReadOnlyStrategy(),strategy).V().count(), "g.withStrategies(new ReadOnlyStrategy(),new SubgraphStrategy(vertices:__.has('region','US-TX'),edges:__.hasLabel('route'))).V().count()"]) # 94 strategy = SubgraphStrategy(vertices=__.has('region', 'US-TX')) tests.append([g.withStrategies(ReadOnlyStrategy(), strategy).V().count(), "g.withStrategies(new ReadOnlyStrategy(),new SubgraphStrategy(vertices:__.has('region','US-TX'))).V().count()"]) # 95 tests.append([g.with_('evaluationTimeout', 500).V().count(), "g.withStrategies(new OptionsStrategy(evaluationTimeout:500)).V().count()"]) # 96 tests.append([g.withStrategies(OptionsStrategy({'evaluationTimeout': 500})).V().count(), "g.withStrategies(new OptionsStrategy(evaluationTimeout:500)).V().count()"]) # 97 tests.append([g.withStrategies(PartitionStrategy(partition_key="partition", write_partition="a", read_partitions=["a"])).addV('test'), "g.withStrategies(new PartitionStrategy(partitionKey:'partition',writePartition:'a',readPartitions:['a'])).addV('test')"]) # 98 tests.append([g.withComputer().V().shortestPath().with_(ShortestPath.target, __.has('name','peter')), "g.withStrategies(new VertexProgramStrategy()).V().shortestPath().with('~tinkerpop.shortestPath.target',__.has('name','peter'))"]) tlr = Translator().of('g') for t in range(len(tests)): a = tlr.translate(tests[t][0].bytecode) assert a == tests[t][1]
def table_entities(cls, *, _g: GraphTraversalSource, table_data: List[Table], existing: EXISTING) -> None: all_tables_ids = list( set([ VertexTypes.Table.value.id( key=TableUris.get(database=t.database, cluster=t.cluster, schema=t.schema, table=t.name).table) for t in table_data ])) all_owner_ids = list( set([ VertexTypes.User.value.id(key=key) for key in [ t.table_writer.id for t in table_data if t.table_writer is not None ] ])) all_application_ids = list( set( list( possible_vertex_ids_for_application_key(*[ t.table_writer.id for t in table_data if t.table_writer is not None ])))) # chunk these since 100,000s seems to choke for tables_ids in chunk(all_tables_ids, 1000): LOGGER.info(f'fetching for tables: {tables_ids}') # fetch database -> cluster -> schema -> table links g = _g.V(tuple(tables_ids)).as_('tables') g = g.coalesce(__.inE( EdgeTypes.Table.value.label).dedup().fold()).as_( EdgeTypes.Table.name) g = g.coalesce(__.unfold().outV().hasLabel( VertexTypes.Schema.value.label).inE( EdgeTypes.Schema.value.label).dedup().fold()).as_( EdgeTypes.Schema.name) g = g.coalesce(__.unfold().outV().hasLabel( VertexTypes.Cluster.value.label).inE( EdgeTypes.Cluster.value.label).dedup().fold()).as_( EdgeTypes.Cluster.name) # fetch table <- links for t in (EdgeTypes.BelongToTable, EdgeTypes.Generates, EdgeTypes.Tag): g = g.coalesce(__.select('tables').inE( t.value.label).fold()).as_(t.name) # fetch table -> column et al links for t in (EdgeTypes.Column, EdgeTypes.Description, EdgeTypes.LastUpdatedAt, EdgeTypes.Source, EdgeTypes.Stat): g = g.coalesce(__.select('tables').outE( t.value.label).fold()).as_(t.name) # TODO: add owners, watermarks, last timestamp existing, source aliases = set([ t.name for t in (EdgeTypes.Table, EdgeTypes.Schema, EdgeTypes.Cluster, EdgeTypes.BelongToTable, EdgeTypes.Generates, EdgeTypes.Tag, EdgeTypes.Column, EdgeTypes.Description, EdgeTypes.LastUpdatedAt, EdgeTypes.Source, EdgeTypes.Stat) ]) g = g.select(*aliases).unfold().select(MapColumn.values).unfold() g = g.local( __.union(__.outV().id(), __.valueMap(True), __.inV().id()).fold()) cls._into_existing(g.toList(), existing) cls._column_entities(_g=_g, tables_ids=tables_ids, existing=existing) # fetch Application, User for ids in chunk(list(set(all_application_ids + all_owner_ids)), 5000): LOGGER.info(f'fetching for application/owners: {ids}') g = _g.V(ids).valueMap(True) cls._into_existing(g.toList(), existing)