Esempio n. 1
0
    def get_node_predecessors(
        self, u: Hashable, include_metadata: bool = False
    ) -> Collection:
        """
        Get a generator of all downstream nodes from this node.

        Arguments:
            u (Hashable): The source node ID

        Returns:
            Generator

        """
        if include_metadata:
            return {
                e["source"]: e
                for e in (
                    self._g.V()
                    .has(ID, u)
                    .inE()
                    .project("target", "source", "properties")
                    .by(__.inV().values(ID))
                    .by(__.outV().values(ID))
                    .by(__.valueMap(True))
                    .toList()
                )
            }
        return self._g.V().out().has(ID, u).values(ID).toList()
Esempio n. 2
0
def run_filters(num_edges):
    g, conn = gremlin_helper.connect_to_neptune()

    logger.info('Querying neptune')
    res = g.V().inE().limit(num_edges).outV().path().by(__.valueMap().with_(
        WithOptions.tokens)).toList()

    nodes_df, edges_df = path_to_df(res)
    url = plot_url(nodes_df, edges_df)

    logger.info("Finished compute phase")

    try:
        conn.close()

    except RuntimeError as e:
        if str(
                e
        ) == "There is no current event loop in thread 'ScriptRunner.scriptThread'.":
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)
            conn.close()
        else:
            raise e

    except Exception as e:
        logger.error('oops in gremlin', exc_info=True)
        raise e

    return {'nodes_df': nodes_df, 'edges_df': edges_df, 'url': url, 'res': res}
Esempio n. 3
0
    def test_traversals(self, remote_connection):
        statics.load_statics(globals())
        g = traversal().withRemote(remote_connection)

        assert long(6) == g.V().count().toList()[0]
        # #
        assert Vertex(1) == g.V(1).next()
        assert 1 == g.V(1).id().next()
        assert Traverser(Vertex(1)) == g.V(1).nextTraverser()
        assert 1 == len(g.V(1).toList())
        assert isinstance(g.V(1).toList(), list)
        results = g.V().repeat(out()).times(2).name
        results = results.toList()
        assert 2 == len(results)
        assert "lop" in results
        assert "ripple" in results
        # #
        assert 10 == g.V().repeat(both()).times(5)[0:10].count().next()
        assert 1 == g.V().repeat(both()).times(5)[0:1].count().next()
        assert 0 == g.V().repeat(both()).times(5)[0:0].count().next()
        assert 4 == g.V()[2:].count().next()
        assert 2 == g.V()[:2].count().next()
        # #
        results = g.withSideEffect('a', ['josh', 'peter']).V(1).out('created').in_('created').values('name').where(P.within('a')).toList()
        assert 2 == len(results)
        assert 'josh' in results
        assert 'peter' in results
        # #
        results = g.V().out().profile().toList()
        assert 1 == len(results)
        assert 'metrics' in results[0]
        assert 'dur' in results[0]
        # #
        results = g.V().has('name', 'peter').as_('a').out('created').as_('b').select('a', 'b').by(
            __.valueMap()).toList()
        assert 1 == len(results)
        assert 'peter' == results[0]['a']['name'][0]
        assert 35 == results[0]['a']['age'][0]
        assert 'lop' == results[0]['b']['name'][0]
        assert 'java' == results[0]['b']['lang'][0]
        assert 2 == len(results[0]['a'])
        assert 2 == len(results[0]['b'])
        # #
        results = g.V(1).inject(g.V(2).next()).values('name').toList()
        assert 2 == len(results)
        assert 'marko' in results
        assert 'vadas' in results
        # #
        results = g.V().has('person', 'name', 'marko').map(lambda: ("it.get().value('name')", "gremlin-groovy")).toList()
        assert 1 == len(results)
        assert 'marko' in results
        # #
        # this test just validates that the underscored versions of steps conflicting with Gremlin work
        # properly and can be removed when the old steps are removed - TINKERPOP-2272
        results = g.V().filter_(__.values('age').sum_().and_(
            __.max_().is_(gt(0)), __.min_().is_(gt(0)))).range_(0, 1).id_().next()
        assert 1 == results
Esempio n. 4
0
    def _depth_search(self,
                      start_vertex,
                      traverser,
                      search_depth=DEFAULT_SEARCH_DEPTH):
        depth = int(
            search_depth) if search_depth is not None else DEFAULT_SEARCH_DEPTH

        return self.g.V(start_vertex).repeat(
            traverser.as_('e').otherV()).times(depth).emit().project(
                'e', 'v').by(__.select('e').valueMap(True).fold()).by(
                    __.valueMap(True).fold()).toList()
def run_filters(num_edges, state, city):
    global metrics
    g, conn = gremlin_helper.connect_to_neptune()

    logger.info('Querying neptune')
    tic = time.perf_counter()
    t = g.V().inE()
    # Conditionally add the state filtering in here
    if not state == "All States":
        t = t.has('visited', 'state', state)
    # Conditionally add the city filtering in here
    if not city == "":
        t = t.has('visited', 'city', city)
    res = t.limit(num_edges).outV().path().by(__.valueMap().with_(
        WithOptions.tokens)).toList()
    toc = time.perf_counter()
    logger.info(f'Query Execution: {toc-tic:0.02f} seconds')
    logger.debug('Query Result Count: %s', len(res))
    metrics['neptune_time'] = toc - tic

    nodes_df, edges_df = path_to_df(res)

    # Calculate the metrics
    metrics['node_cnt'] = nodes_df.size
    metrics['edge_cnt'] = edges_df.size
    metrics['prop_cnt'] = (nodes_df.size * nodes_df.columns.size) + \
        (edges_df.size * edges_df.columns.size)

    if nodes_df.size > 0:
        url = plot_url(nodes_df, edges_df)
    else:
        url = ""

    logger.info("Finished compute phase")

    try:
        conn.close()

    except RuntimeError as e:
        if str(
                e
        ) == "There is no current event loop in thread 'ScriptRunner.scriptThread'.":
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)
            conn.close()
        else:
            raise e

    except Exception as e:
        logger.error('oops in gremlin', exc_info=True)
        raise e

    return {'nodes_df': nodes_df, 'edges_df': edges_df, 'url': url, 'res': res}
Esempio n. 6
0
def test_unknown_vertices_later_discovered_get_repointed(gremlin_connector, cloudwanderer_boto3_session):
    """If we discover a resource (A) which has a relationship with a resource (B) but resource B
    has not been discovered yet, ensure that resource B is created as UNKNOWN and then
    is overwritten and cleaned up once it is discovered.
    """
    create_ec2_instances()
    ec2_instance = get_inferred_ec2_instances(cloudwanderer_boto3_session)[0]
    vpc = inferred_ec2_vpcs(cloudwanderer_boto3_session)[0]

    # Step 1 write ec2 without vpc, vpc gets written as unknown
    gremlin_connector.write_resource(ec2_instance)

    result_1, result_2 = (
        gremlin_connector.g.V(gremlin_connector.generate_vertex_id(ec2_instance.urn))
        .both()
        .path()
        .by(__.valueMap(True))
        .toList()[0]
    )
    unknown_vpc_urn = result_2["_urn"][0]
    assert "urn:aws:111111111111:eu-west-2:ec2:instance:i-" in result_1["_urn"][0]
    assert "urn:aws:unknown:eu-west-2:ec2:vpc:vpc-" in unknown_vpc_urn

    # Step 2 write vpc, vpc unknown gets deleted and its edges repointed
    gremlin_connector.write_resource(vpc)

    result_1, result_2 = (
        gremlin_connector.g.V(gremlin_connector.generate_vertex_id(ec2_instance.urn))
        .both()
        .path()
        .by(__.valueMap(True))
        .toList()[0]
    )
    assert result_1["_urn"][0].startswith("urn:aws:111111111111:eu-west-2:ec2:instance:i-")
    assert result_2["_urn"][0].startswith("urn:aws:111111111111:eu-west-2:ec2:vpc:vpc-")

    # Ensure the deleted vpc no longer exists
    unknown_vpc = gremlin_connector.g.V(unknown_vpc_urn).toList()

    assert unknown_vpc == []
def run_filters(num_edges, num_matches, transient_id):
    global metrics
    g, conn = gremlin_helper.connect_to_neptune()

    logger.info('Querying neptune')
    tic = time.perf_counter()
    t = g.V().hasLabel('transientId')
    if not transient_id == "":
        # If using Neptune full text search this will perform much faster than the built in Gremlin text search
        t = t.has('uid', TextP.containing(transient_id))
    res = t.limit(num_matches).bothE().otherV().limit(num_edges).path().by(
        __.valueMap().with_(WithOptions.tokens)).toList()

    toc = time.perf_counter()
    logger.info(f'Query Execution: {toc-tic:0.02f} seconds')
    logger.debug('Query Result Count: %s', len(res))
    metrics['neptune_time'] = toc - tic

    nodes_df, edges_df = path_to_df(res)

    # Calculate the metrics
    metrics['node_cnt'] = nodes_df.size
    metrics['edge_cnt'] = edges_df.size
    metrics['prop_cnt'] = (nodes_df.size * nodes_df.columns.size) + \
        (edges_df.size * edges_df.columns.size)

    if nodes_df.size > 0:
        url = plot_url(nodes_df, edges_df)
    else:
        url = ""

    logger.info("Finished compute phase")

    try:
        conn.close()

    except RuntimeError as e:
        if str(
                e
        ) == "There is no current event loop in thread 'ScriptRunner.scriptThread'.":
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)
            conn.close()
        else:
            raise e

    except Exception as e:
        logger.error('oops in gremlin', exc_info=True)
        raise e

    return {'nodes_df': nodes_df, 'edges_df': edges_df, 'url': url, 'res': res}
Esempio n. 8
0
    def test_traversals(self, remote_connection):
        statics.load_statics(globals())
        assert "remoteconnection[ws://localhost:45940/gremlin,g]" == str(
            remote_connection)
        g = Graph().traversal().withRemote(remote_connection)

        assert long(6) == g.V().count().toList()[0]
        # #
        assert Vertex(1) == g.V(1).next()
        assert 1 == g.V(1).id().next()
        assert Traverser(Vertex(1)) == g.V(1).nextTraverser()
        assert 1 == len(g.V(1).toList())
        assert isinstance(g.V(1).toList(), list)
        results = g.V().repeat(out()).times(2).name
        results = results.toList()
        assert 2 == len(results)
        assert "lop" in results
        assert "ripple" in results
        # #
        assert 10 == g.V().repeat(both()).times(5)[0:10].count().next()
        assert 1 == g.V().repeat(both()).times(5)[0:1].count().next()
        assert 0 == g.V().repeat(both()).times(5)[0:0].count().next()
        assert 4 == g.V()[2:].count().next()
        assert 2 == g.V()[:2].count().next()
        # #
        results = g.withSideEffect(
            'a', ['josh', 'peter'
                  ]).V(1).out('created').in_('created').values('name').where(
                      within('a')).toList()
        assert 2 == len(results)
        assert 'josh' in results
        assert 'peter' in results
        # # todo: need a traversal metrics deserializer
        g.V().out().profile().next()
        # #
        results = g.V().has('name',
                            'peter').as_('a').out('created').as_('b').select(
                                'a', 'b').by(__.valueMap()).toList()
        assert 1 == len(results)
        assert 'peter' == results[0]['a']['name'][0]
        assert 35 == results[0]['a']['age'][0]
        assert 'lop' == results[0]['b']['name'][0]
        assert 'java' == results[0]['b']['lang'][0]
        assert 2 == len(results[0]['a'])
        assert 2 == len(results[0]['b'])
        # #
        results = g.V(1).inject(g.V(2).next()).values('name').toList()
        assert 2 == len(results)
        assert 'marko' in results
        assert 'vadas' in results
    def test_traversals(self, remote_connection):
        statics.load_statics(globals())
        assert "remoteconnection[ws://localhost:45940/gremlin,gmodern]" == str(remote_connection)
        g = traversal().withRemote(remote_connection)

        assert long(6) == g.V().count().toList()[0]
        # #
        assert Vertex(1) == g.V(1).next()
        assert 1 == g.V(1).id().next()
        assert Traverser(Vertex(1)) == g.V(1).nextTraverser()
        assert 1 == len(g.V(1).toList())
        assert isinstance(g.V(1).toList(), list)
        results = g.V().repeat(out()).times(2).name
        results = results.toList()
        assert 2 == len(results)
        assert "lop" in results
        assert "ripple" in results
        # #
        assert 10 == g.V().repeat(both()).times(5)[0:10].count().next()
        assert 1 == g.V().repeat(both()).times(5)[0:1].count().next()
        assert 0 == g.V().repeat(both()).times(5)[0:0].count().next()
        assert 4 == g.V()[2:].count().next()
        assert 2 == g.V()[:2].count().next()
        # #
        results = g.withSideEffect('a', ['josh', 'peter']).V(1).out('created').in_('created').values('name').where(
            within('a')).toList()
        assert 2 == len(results)
        assert 'josh' in results
        assert 'peter' in results
        # #
        results = g.V().out().profile().toList()
        assert 1 == len(results)
        assert 'metrics' in results[0]
        assert 'dur' in results[0]
        # #
        results = g.V().has('name', 'peter').as_('a').out('created').as_('b').select('a', 'b').by(
            __.valueMap()).toList()
        assert 1 == len(results)
        assert 'peter' == results[0]['a']['name'][0]
        assert 35 == results[0]['a']['age'][0]
        assert 'lop' == results[0]['b']['name'][0]
        assert 'java' == results[0]['b']['lang'][0]
        assert 2 == len(results[0]['a'])
        assert 2 == len(results[0]['b'])
        # #
        results = g.V(1).inject(g.V(2).next()).values('name').toList()
        assert 2 == len(results)
        assert 'marko' in results
        assert 'vadas' in results
def posts_by_topic(g, topic):
    paths = (g.V().has('tag', 'tag', topic).inE('tagged').outV().path().by(
        __.valueMap(True)).toList())

    vertices = []
    edges = []
    for path in paths:
        start = vertex_to_dict(path[0])
        end = vertex_to_dict(path[2])
        vertices.append(start)
        vertices.append(end)
        edges.append(edge_to_dict(path[1], end['id'], start['id']))

    vertices = list({frozenset(item.items()): item
                     for item in vertices}.values())
    edges = list({frozenset(item.items()): item for item in edges}.values())
    return {'vertices': vertices, 'edges': edges}
Esempio n. 11
0
def get_onestop_flights_from_janus(from_, to, context):
    try:
        res = (
            g.V()
            .hasLabel("airport")
            .has("id", "9600276f-608f-4325-a037-f185848f2e28")
            .bothE("departing")
            .otherV()
            .as_("flight1")
            .hasLabel("flight")
            .values("flight_duration")
            .as_("fd")
            .select("flight1")
            .values("flight_time")
            .math("_ + fd + 60")
            .as_("flight1_time")
            .select("flight1")
            .bothE("arriving")
            .otherV()
            .hasLabel("airport")
            .bothE("departing")
            .otherV()
            .as_("flight2")
            .hasLabel("flight")
            .values("flight_time")
            .math("_ - flight1_time")
            .is_(P.gte(0))
            .where("flight1", P.eq("flight2"))
            .by("airlines")
            .select("flight2")
            .bothE("arriving")
            .otherV()
            .hasLabel("airport")
            .has("id", "ebc645cd-ea42-40dc-b940-69456b64d2dd")
            .select("flight1", "flight2")
            .by(__.valueMap())
            .limit(20)
            .toList()
        )

        for i in range(0, len(res)):
            res[i] = merge_flight_data(get_values(res[i]))
        return res
    except Exception as e:
        print(e)
Esempio n. 12
0
    def _column_entities(cls, *, _g: GraphTraversalSource,
                         tables_ids: Iterable[str],
                         existing: EXISTING) -> None:
        # fetch database -> cluster -> schema -> table links
        g = _g.V(tuple(tables_ids))
        g = g.outE(EdgeTypes.Column.value.label)
        g = g.inV().hasLabel(VertexTypes.Column.value.label).as_('columns')

        # fetch column -> links (no Stat)
        for t in [EdgeTypes.Description]:
            g = g.coalesce(__.select('columns').outE(
                t.value.label).fold()).as_(t.name)

        g = g.select(EdgeTypes.Description.name).unfold()
        g = g.local(
            __.union(__.outV().id(), __.valueMap(True),
                     __.inV().id()).fold())
        cls._into_existing(g.toList(), existing)
Esempio n. 13
0
    def expire_connections_for_other(cls, *, _g: GraphTraversalSource,
                                     vertex_type: VertexType,
                                     keys: FrozenSet[str],
                                     existing: EXISTING) -> None:
        # V().has(label, 'key', P.without(keys)) is more intuitive but doesn't scale, so instead just find all those
        g = _g.V().hasLabel(vertex_type.label).where(__.bothE())
        g = g.values(WellKnownProperties.Key.value.name)
        all_to_expire_keys = set(g.toList()).difference(keys)

        # TODO: when any vertex ids that need something besides key
        all_to_expire = set(
            vertex_type.id(key=key) for key in all_to_expire_keys)

        for to_expire in chunk(all_to_expire, 1000):
            g = _g.V(tuple(to_expire)).bothE()
            g = g.local(
                __.union(__.outV().id(), __.valueMap(True),
                         __.inV().id()).fold())
            cls._into_existing(g.toList(), existing)
Esempio n. 14
0
    async def _get_vertex_properties(self, vid, label):
        projection = self._g.V(vid).properties() \
                            .project('id', 'key', 'value', 'meta') \
                            .by(__.id()).by(__.key()).by(__.value()) \
                            .by(__.valueMap())
        props = await projection.toList()
        new_props = {'label': label, 'id': vid}
        for prop in props:
            key = prop['key']
            val = prop['value']
            # print('val_type', type(val))
            meta = prop['meta']
            new_props.setdefault(key, [])
            if meta:
                meta['key'] = key
                meta['value'] = val
                meta['id'] = prop['id']
                val = meta

            new_props[key].append(val)
        return new_props
    def test_upsert_rt(self) -> None:
        # test that we will insert
        db_name = Fixtures.next_database()
        database_uri = f'database://{db_name}'
        exists = self._get(label=VertexTypes.Database,
                           key=database_uri,
                           extra_traversal=__.count())
        self.assertEqual(exists, 0)
        self._upsert(label=VertexTypes.Database, key=database_uri, name='test')
        exists = self._get(label=VertexTypes.Database,
                           key=database_uri,
                           extra_traversal=__.count())
        self.assertEqual(exists, 1)

        # test that we will not insert (_get will explode if more than one vertex matches)
        self._upsert(label=VertexTypes.Database, key=database_uri, name='test')
        vertexValueMap = self._get(label=VertexTypes.Database,
                                   key=database_uri,
                                   extra_traversal=__.valueMap(),
                                   get=FromResultSet.toList)
        self.assertIsNotNone(vertexValueMap)
Esempio n. 16
0
def test_stale_edges_get_removed(gremlin_connector, iam_instance_profile):
    gremlin_connector.write_resource(resource=iam_instance_profile)
    result = get_vertex_and_edges(gremlin_connector, iam_instance_profile.urn)[0]

    assert result["v1"]["_urn"][0] == "urn:aws:111111111111:us-east-1:iam:instance_profile:my-test-profile"
    assert result["v2"]["_urn"][0] == "urn:aws:unknown:us-east-1:iam:role:test-role"

    # Remove the relationship with the role and ensure that we don't get a result
    # when we search for an edge on the instance profile
    iam_instance_profile.relationships = []
    iam_instance_profile.discovery_time = datetime.now()
    gremlin_connector.write_resource(resource=iam_instance_profile)

    assert (
        gremlin_connector.g.V(gremlin_connector.generate_vertex_id(iam_instance_profile.urn))
        .both()
        .path()
        .by(__.valueMap(True))
        .toList()
        == []
    )
def coauthored_posts(g, author1, author2):
    paths = (g.V().has('author', 'name', author1).as_('a').inE(
        'written_by').outV().hasLabel('post').outE('written_by').inV().where(
            neq('a')).has('author', 'name',
                          author2).path().by(__.valueMap(True)))

    vertices = []
    edges = []
    for path in paths:
        author1 = vertex_to_dict(path[0])
        post = vertex_to_dict(path[2])
        author2 = vertex_to_dict(path[4])
        vertices.append(author1)
        vertices.append(post)
        vertices.append(author2)
        edges.append(edge_to_dict(path[1], post['id'], author1['id']))
        edges.append(edge_to_dict(path[3], post['id'], author2['id']))

    vertices = list({frozenset(item.items()): item
                     for item in vertices}.values())
    edges = list({frozenset(item.items()): item for item in edges}.values())
    return {'vertices': vertices, 'edges': edges}
def posts_by_topic_and_author(g, topic, author):
    paths = (g.V().has(
        'tag', 'tag',
        topic).inE('tagged').outV().dedup().outE('written_by').inV().has(
            'author', 'name', author).path().by(__.valueMap(True)))

    vertices = []
    edges = []
    for path in paths:
        tag = vertex_to_dict(path[0])
        post = vertex_to_dict(path[2])
        author = vertex_to_dict(path[4])
        vertices.append(tag)
        vertices.append(post)
        vertices.append(author)
        edges.append(edge_to_dict(path[1], post['id'], tag['id']))
        edges.append(edge_to_dict(path[3], post['id'], author['id']))

    vertices = list({frozenset(item.items()): item
                     for item in vertices}.values())
    edges = list({frozenset(item.items()): item for item in edges}.values())
    return {'vertices': vertices, 'edges': edges}
Esempio n. 19
0
def test_write_resource_and_relationship(gremlin_connector, iam_role, iam_role_policies):
    gremlin_connector.write_resource(iam_role_policies[0])
    gremlin_connector.write_resource(resource=iam_role)
    result_1, result_2 = (
        gremlin_connector.g.V(gremlin_connector.generate_vertex_id(iam_role.urn))
        .both()
        .path()
        .by(__.valueMap(True))
        .toList()[0]
    )

    assert list(result_1.values()) == [
        ANY,
        "aws_iam_role",
        ["111111111111"],
        ["[{'PolicyNames': ['test-role']}]"],
        ["test-role"],
        ["test-role"],
        ["iam"],
        ["aws"],
        ["us-east-1"],
        [ANY],
        ["role"],
        ["urn:aws:111111111111:us-east-1:iam:role:test-role"],
    ]

    assert list(result_2.values()) == [
        ANY,
        "aws_iam_role_policy",
        ["111111111111"],
        ["test-role", "test-role-policy-1"],
        ["iam"],
        ["aws"],
        ["us-east-1"],
        [ANY],
        ["role_policy"],
        ["urn:aws:111111111111:us-east-1:iam:role_policy:test-role/test-role-policy-1"],
    ]
Esempio n. 20
0
    def all_edges_as_iterable(self, include_metadata: bool = False) -> Collection:
        """
        Get a list of all edges in this graph, arbitrary sort.

        Arguments:
            include_metadata (bool: False): Whether to include edge metadata

        Returns:
            Generator: A generator of all edges (arbitrary sort)

        """
        if include_metadata:
            return iter(
                [
                    (e["source"], e["target"], _node_to_metadata(e["properties"]))
                    for e in (
                        self._g.V()
                        .outE()
                        .project("target", "source", "properties")
                        .by(__.inV().values(ID))
                        .by(__.outV().values(ID))
                        .by(__.valueMap(True))
                        .toList()
                    )
                ]
            )
        return iter(
            [
                (e["source"], e["target"])
                for e in self._g.V()
                .outE()
                .project("target", "source")
                .by(__.inV().values(ID))
                .by(__.outV().values(ID))
                .toList()
            ]
        )
Esempio n. 21
0
def get_vertex_and_edges(gremlin_connector, urn):
    return (gremlin_connector.g.V(gremlin_connector.generate_vertex_id(
        urn)).as_("v1").bothE().as_("e").outV().as_("v2").select(
            "v1", "e", "v2").by(__.valueMap(True)).toList())
Esempio n. 22
0
    def table_entities(cls, *, _g: GraphTraversalSource,
                       table_data: List[Table], existing: EXISTING) -> None:

        all_tables_ids = list(
            set([
                VertexTypes.Table.value.id(
                    key=TableUris.get(database=t.database,
                                      cluster=t.cluster,
                                      schema=t.schema,
                                      table=t.name).table) for t in table_data
            ]))

        all_owner_ids = list(
            set([
                VertexTypes.User.value.id(key=key) for key in [
                    t.table_writer.id for t in table_data
                    if t.table_writer is not None
                ]
            ]))
        all_application_ids = list(
            set(
                list(
                    possible_vertex_ids_for_application_key(*[
                        t.table_writer.id for t in table_data
                        if t.table_writer is not None
                    ]))))

        # chunk these since 100,000s seems to choke
        for tables_ids in chunk(all_tables_ids, 1000):
            LOGGER.info(f'fetching for tables: {tables_ids}')
            # fetch database -> cluster -> schema -> table links
            g = _g.V(tuple(tables_ids)).as_('tables')
            g = g.coalesce(__.inE(
                EdgeTypes.Table.value.label).dedup().fold()).as_(
                    EdgeTypes.Table.name)
            g = g.coalesce(__.unfold().outV().hasLabel(
                VertexTypes.Schema.value.label).inE(
                    EdgeTypes.Schema.value.label).dedup().fold()).as_(
                        EdgeTypes.Schema.name)
            g = g.coalesce(__.unfold().outV().hasLabel(
                VertexTypes.Cluster.value.label).inE(
                    EdgeTypes.Cluster.value.label).dedup().fold()).as_(
                        EdgeTypes.Cluster.name)

            # fetch table <- links
            for t in (EdgeTypes.BelongToTable, EdgeTypes.Generates,
                      EdgeTypes.Tag):
                g = g.coalesce(__.select('tables').inE(
                    t.value.label).fold()).as_(t.name)

            # fetch table -> column et al links
            for t in (EdgeTypes.Column, EdgeTypes.Description,
                      EdgeTypes.LastUpdatedAt, EdgeTypes.Source,
                      EdgeTypes.Stat):
                g = g.coalesce(__.select('tables').outE(
                    t.value.label).fold()).as_(t.name)

            # TODO: add owners, watermarks, last timestamp existing, source
            aliases = set([
                t.name
                for t in (EdgeTypes.Table, EdgeTypes.Schema, EdgeTypes.Cluster,
                          EdgeTypes.BelongToTable, EdgeTypes.Generates,
                          EdgeTypes.Tag, EdgeTypes.Column,
                          EdgeTypes.Description, EdgeTypes.LastUpdatedAt,
                          EdgeTypes.Source, EdgeTypes.Stat)
            ])
            g = g.select(*aliases).unfold().select(MapColumn.values).unfold()
            g = g.local(
                __.union(__.outV().id(), __.valueMap(True),
                         __.inV().id()).fold())
            cls._into_existing(g.toList(), existing)

            cls._column_entities(_g=_g,
                                 tables_ids=tables_ids,
                                 existing=existing)

        # fetch Application, User
        for ids in chunk(list(set(all_application_ids + all_owner_ids)), 5000):
            LOGGER.info(f'fetching for application/owners: {ids}')
            g = _g.V(ids).valueMap(True)
            cls._into_existing(g.toList(), existing)
    def test_traversals(self, remote_connection):
        statics.load_statics(globals())
        g = traversal().withRemote(remote_connection)

        assert long(6) == g.V().count().toList()[0]
        # #
        assert Vertex(1) == g.V(1).next()
        assert 1 == g.V(1).id().next()
        assert Traverser(Vertex(1)) == g.V(1).nextTraverser()
        assert 1 == len(g.V(1).toList())
        assert isinstance(g.V(1).toList(), list)
        results = g.V().repeat(out()).times(2).name
        results = results.toList()
        assert 2 == len(results)
        assert "lop" in results
        assert "ripple" in results
        # #
        assert 10 == g.V().repeat(both()).times(5)[0:10].count().next()
        assert 1 == g.V().repeat(both()).times(5)[0:1].count().next()
        assert 0 == g.V().repeat(both()).times(5)[0:0].count().next()
        assert 4 == g.V()[2:].count().next()
        assert 2 == g.V()[:2].count().next()
        # #
        results = g.withSideEffect(
            'a', ['josh', 'peter'
                  ]).V(1).out('created').in_('created').values('name').where(
                      P.within('a')).toList()
        assert 2 == len(results)
        assert 'josh' in results
        assert 'peter' in results
        # #
        results = g.V().out().profile().toList()
        assert 1 == len(results)
        assert 'metrics' in results[0]
        assert 'dur' in results[0]
        # #
        results = g.V().has('name',
                            'peter').as_('a').out('created').as_('b').select(
                                'a', 'b').by(__.valueMap()).toList()
        assert 1 == len(results)
        assert 'peter' == results[0]['a']['name'][0]
        assert 35 == results[0]['a']['age'][0]
        assert 'lop' == results[0]['b']['name'][0]
        assert 'java' == results[0]['b']['lang'][0]
        assert 2 == len(results[0]['a'])
        assert 2 == len(results[0]['b'])
        # #
        results = g.V(1).inject(g.V(2).next()).values('name').toList()
        assert 2 == len(results)
        assert 'marko' in results
        assert 'vadas' in results
        # #
        results = g.V().has('person', 'name', 'marko').map(
            lambda: ("it.get().value('name')", "gremlin-groovy")).toList()
        assert 1 == len(results)
        assert 'marko' in results
        # #
        # this test just validates that the underscored versions of steps conflicting with Gremlin work
        # properly and can be removed when the old steps are removed - TINKERPOP-2272
        results = g.V().filter_(
            __.values('age').sum_().and_(__.max_().is_(gt(0)),
                                         __.min_().is_(gt(0)))).range_(
                                             0, 1).id_().next()
        assert 1 == results
        # #
        # test binding in P
        results = g.V().has('person', 'age',
                            Bindings.of('x', lt(30))).count().next()
        assert 2 == results
        # #
        # test dict keys which can only work on GraphBinary and GraphSON3 which include specific serialization
        # types for dict
        if not isinstance(remote_connection._client._message_serializer,
                          GraphSONSerializersV2d0):
            results = g.V().has(
                'person', 'name',
                'marko').elementMap("name").groupCount().next()
            assert {
                HashableDict.of({
                    T.id: 1,
                    T.label: 'person',
                    'name': 'marko'
                }): 1
            } == results