Example #1
0
    def test_side_effects(self):
        statics.load_statics(globals())
        connection = DriverRemoteConnection('ws://localhost:45940/gremlin',
                                            'g')
        #
        g = Graph().traversal().withRemote(connection)
        ###
        t = g.V().hasLabel("project").name.iterate()
        assert 0 == len(t.side_effects.keys())
        try:
            m = t.side_effects["m"]
            raise Exception(
                "Accessing a non-existent key should throw an error")
        except KeyError:
            pass
        ###
        t = g.V().out("created").groupCount("m").by("name")
        results = t.toSet()
        assert 2 == len(results)
        assert Vertex(3) in results
        assert Vertex(5) in results
        assert 1 == len(t.side_effects.keys())
        assert "m" in t.side_effects.keys()
        m = t.side_effects["m"]
        assert isinstance(m, dict)
        assert 2 == len(m)
        assert 3 == m["lop"]
        assert 1 == m["ripple"]
        assert isinstance(m["lop"], long)
        assert isinstance(m["ripple"], long)
        ###
        t = g.V().out("created").groupCount("m").by("name").name.aggregate("n")
        results = t.toSet()
        assert 2 == len(results)
        assert "lop" in results
        assert "ripple" in results
        assert 2 == len(t.side_effects.keys())
        assert "m" in t.side_effects.keys()
        assert "n" in t.side_effects.keys()
        n = t.side_effects.get("n")
        assert isinstance(n, dict)
        assert 2 == len(n)
        assert "lop" in n.keys()
        assert "ripple" in n.keys()
        assert 3 == n["lop"]
        assert 1 == n["ripple"]

        t = g.withSideEffect('m', 32).V().map(lambda: "x: x.sideEffects('m')")
        results = t.toSet()
        assert 1 == len(results)
        assert 32 == list(results)[0]
        assert 32 == t.side_effects['m']
        assert 1 == len(t.side_effects.keys())
        try:
            x = t.side_effects["x"]
            raise Exception(
                "Accessing a non-existent key should throw an error")
        except KeyError:
            pass
        connection.close()
    def test_traversals(self):
        statics.load_statics(globals())
        connection = DriverRemoteConnection('ws://localhost:8182/gremlin', 'g')
        assert "remoteconnection[ws://localhost:8182/gremlin,g]" == str(connection)
        g = Graph().traversal().withRemote(connection)

        assert long(6) == g.V().count().toList()[0]
        #
        assert Vertex(1) == g.V(1).next()
        assert 1 == g.V(1).id().next()
        assert Traverser(Vertex(1)) == g.V(1).nextTraverser()
        assert 1 == len(g.V(1).toList())
        assert isinstance(g.V(1).toList(), list)
        results = g.V().repeat(out()).times(2).name
        results = results.toList()
        assert 2 == len(results)
        assert "lop" in results
        assert "ripple" in results
        #
        assert 10 == g.V().repeat(both()).times(5)[0:10].count().next()
        assert 1 == g.V().repeat(both()).times(5)[0].count().next()
        assert 0 == g.V().repeat(both()).times(5)[0:0].count().next()
        assert 4 == g.V()[2:].count().next()
        assert 2 == g.V()[:2].count().next()
        # todo: need a traversal metrics deserializer
        g.V().out().profile().next()
        connection.close()
    def test_side_effect_close(self):
        connection = DriverRemoteConnection('ws://localhost:8182/gremlin', 'g')
        g = Graph().traversal().withRemote(connection)
        t = g.V().aggregate('a').aggregate('b')
        t.toList()

        # The 'a' key should return some side effects
        results = t.side_effects.get('a')
        assert results

        # Close result is None
        results = t.side_effects.close()
        assert not results

        # Shouldn't get any new info from server
        # 'b' isn't in local cache
        results = t.side_effects.get('b')
        assert not results

        # But 'a' should still be cached locally
        results = t.side_effects.get('a')
        assert results

        # 'a' should have been added to local keys cache, but not 'b'
        results = t.side_effects.keys()
        assert len(results) == 1
        a, = results
        assert a == 'a'

        # Try to get 'b' directly from server, should throw error
        with pytest.raises(Exception):
            t.side_effects.value_lambda('b')
        connection.close()
Example #4
0
 def test_traversals(self):
     statics.load_statics(globals())
     connection = DriverRemoteConnection('ws://localhost:8182/gremlin', 'g')
     assert "remoteconnection[ws://localhost:8182/gremlin,g]" == str(
         connection)
     #
     g = Graph().traversal().withRemote(connection)
     #
     assert 6L == g.V().count().toList()[0]
     #
     assert Vertex(1) == g.V(1).next()
     assert 1 == g.V(1).id().next()
     assert Traverser(Vertex(1)) == g.V(1).nextTraverser()
     assert 1 == len(g.V(1).toList())
     assert isinstance(g.V(1).toList(), list)
     #
     results = g.V().repeat(out()).times(2).name.toList()
     assert 2 == len(results)
     assert "lop" in results
     assert "ripple" in results
     #
     assert 10 == g.V().repeat(both()).times(5)[0:10].count().next()
     assert 1 == g.V().repeat(both()).times(5)[0].count().next()
     assert 0 == g.V().repeat(both()).times(5)[0:0].count().next()
     assert 4 == g.V()[2:].count().next()
     assert 2 == g.V()[:2].count().next()
     # todo: need a traversal metrics deserializer
     g.V().out().profile().next()
     connection.close()
 def test_strategies(self):
     statics.load_statics(globals())
     connection = DriverRemoteConnection('ws://localhost:8182/gremlin', 'g')
     #
     g = Graph().traversal().withRemote(connection). \
         withStrategies(TraversalStrategy("SubgraphStrategy",
                                          {"vertices": __.hasLabel("person"),
                                           "edges": __.hasLabel("created")}))
     assert 4 == g.V().count().next()
     assert 0 == g.E().count().next()
     assert 1 == g.V().label().dedup().count().next()
     assert "person" == g.V().label().dedup().next()
     #
     g = Graph().traversal().withRemote(connection). \
         withStrategies(SubgraphStrategy(vertices=__.hasLabel("person"), edges=__.hasLabel("created")))
     assert 4 == g.V().count().next()
     assert 0 == g.E().count().next()
     assert 1 == g.V().label().dedup().count().next()
     assert "person" == g.V().label().dedup().next()
     #
     g = g.withoutStrategies(SubgraphStrategy). \
         withComputer(workers=4, vertices=__.has("name", "marko"), edges=__.limit(0))
     assert 1 == g.V().count().next()
     assert 0 == g.E().count().next()
     assert "person" == g.V().label().next()
     assert "marko" == g.V().name.next()
     #
     g = Graph().traversal().withRemote(connection).withComputer()
     assert 6 == g.V().count().next()
     assert 6 == g.E().count().next()
     connection.close()
Example #6
0
def create_vertices(term: str, entities: list):
    """
    Creates vertices to graph term to entities
    """
    graph = Graph()
    connection = DriverRemoteConnection(
        f'wss://{os.environ["NEPTUNE_ENDPOINT"]}:8182/gremlin', 'g')

    g = graph.traversal().withRemote(connection)

    # Check if a vertex has been created for the term
    term_vertex = g.V().has("term", "value", term)
    term_vertex = term_vertex.next() if term_vertex.hasNext() else g.addV(
        "term").property("value", term).next()

    # Create an entity vertex for each and link to term
    for e in entities:
        entity_vertex = g.V().has("entity", "value", e)
        entity_vertex = entity_vertex.next() if entity_vertex.hasNext() else \
            g.addV("entity") \
                .property("value", e["entity"]) \
                .property("score", e["score"]) \
                .property("type", e["type"]).next()

        g.V(term_vertex).addE("has_entity").to(entity_vertex).iterate()

    connection.close()
 def go():
     conn = DriverRemoteConnection(
         'ws://localhost:45940/gremlin', 'gmodern', pool_size=4)
     g = Graph().traversal().withRemote(conn)
     yield gen.sleep(0)
     assert len(g.V().toList()) == 6
     conn.close()
    def test_side_effect_close(self):
        connection = DriverRemoteConnection('ws://localhost:8182/gremlin', 'g')
        g = Graph().traversal().withRemote(connection)
        t = g.V().aggregate('a').aggregate('b')
        t.toList()

        # The 'a' key should return some side effects
        results = t.side_effects.get('a')
        assert results

        # Close result is None
        results = t.side_effects.close()
        assert not results

        # Shouldn't get any new info from server
        # 'b' isn't in local cache
        results = t.side_effects.get('b')
        assert not results

        # But 'a' should still be cached locally
        results = t.side_effects.get('a')
        assert results

        # 'a' should have been added to local keys cache, but not 'b'
        results = t.side_effects.keys()
        assert len(results) == 1
        a, = results
        assert a == 'a'

        # Try to get 'b' directly from server, should throw error
        with pytest.raises(Exception):
            t.side_effects.value_lambda('b')
        connection.close()
Example #9
0
def setup_graph(conn_string=DEFAULT_LOCAL_CONNECTION_STRING):
    """
        Establish the connection to a property graph service using the connection string and return the gremlin graph.
    :param conn_string: connection parameter
    :return: gremlin graph
    """
    try:
        graph = Graph()
        logging.debug('Trying To Connect')
        # new style
        connection = DriverRemoteConnection(conn_string, 'g')
        connection.close()
        connection = DriverRemoteConnection(conn_string, 'g')
        logging.debug('Connected')
        # The connection should be closed on shut down to close open connections with connection.close()

        # g = graph.traversal().withRemote(connection) # Deprecated instantiation of traversal
        g = traversal().withRemote(connection)

        logging.info('Successfully connected to the graph server')
    except Exception as e:  # Shouldn't really be so broad
        logging.error("Could not connect to the Gremlin server. Run for example:" \
                      "\n'docker run --rm --name janusgraph-default janusgraph/janusgraph:latest' OR" \
                      "\n'docker run --name gremlin-server -p 8182:8182 tinkerpop/gremlin-server'")
        raise ConnectionError("Could not connect to the Gremlin server.")
    return g
 def go():
     conn = DriverRemoteConnection(
         'ws://localhost:45940/gremlin', 'gmodern', pool_size=4)
     g = traversal().withRemote(conn)
     yield gen.sleep(0)
     assert len(g.V().toList()) == 6
     conn.close()
Example #11
0
def lambda_handler(event, context):
    graph = Graph()
    uid1 = event["userId1"] 
    uid2 = event["userId2"]

    remoteConn = DriverRemoteConnection('ws://neptunedbinstance-3f8bwqre3vsy.cft44vxyghsh.us-east-1.neptune.amazonaws.com:8182/gremlin','g')
    g = graph.traversal().withRemote(remoteConn)
    friends= g.V().hasLabel('User').has('uid', uid1).\
             both('FRIEND').aggregate('friends'). \
             valueMap().toList()
    list2 = []
    for item in friends:
        uid = item["uid"]
        list2.append(uid[0])
    if uid2 in list2:
        return {
            'statusCode': 400
        }
    a=g.V().hasLabel('User').has('uid', uid1).next()
    b=g.V().hasLabel('User').has('uid', uid2).next()
   
    g.V(a).addE('FRIEND').to(b).iterate()
    remoteConn.close()
    # TODO implement
    return {
        'statusCode': 200
    }
 def test_strategies(self):
     statics.load_statics(globals())
     connection = DriverRemoteConnection('ws://localhost:8182/gremlin', 'g')
     #
     g = Graph().traversal().withRemote(connection). \
         withStrategies(TraversalStrategy("SubgraphStrategy",
                                          {"vertices": __.hasLabel("person"),
                                           "edges": __.hasLabel("created")}))
     assert 4 == g.V().count().next()
     assert 0 == g.E().count().next()
     assert 1 == g.V().label().dedup().count().next()
     assert "person" == g.V().label().dedup().next()
     #
     g = Graph().traversal().withRemote(connection). \
         withStrategies(SubgraphStrategy(vertices=__.hasLabel("person"), edges=__.hasLabel("created")))
     assert 4 == g.V().count().next()
     assert 0 == g.E().count().next()
     assert 1 == g.V().label().dedup().count().next()
     assert "person" == g.V().label().dedup().next()
     #
     g = g.withoutStrategies(SubgraphStrategy). \
         withComputer(workers=4, vertices=__.has("name", "marko"), edges=__.limit(0))
     assert 1 == g.V().count().next()
     assert 0 == g.E().count().next()
     assert "person" == g.V().label().next()
     assert "marko" == g.V().name.next()
     #
     g = Graph().traversal().withRemote(connection).withComputer()
     assert 6 == g.V().count().next()
     assert 6 == g.E().count().next()
     connection.close()
Example #13
0
def main():
    graph = Graph()
    remote = DriverRemoteConnection('ws://' + os.environ['DB_ENDPOINT'] + ':8182/gremlin', 'g')
    g = graph.traversal().withRemote(remote)

    seed_users(g)

    remote.close()
Example #14
0
def handle_request():
    try:
        remote_connection = DriverRemoteConnection("ws://localhost:45940/gremlin", "g")
        g = traversal().withRemote(remote_connection)
        g.V().limit(1).toList()
        remote_connection.close()
        return True
    except RuntimeError:
        return False
    def test_side_effects(self):
        statics.load_statics(globals())
        connection = DriverRemoteConnection('ws://localhost:8182/gremlin', 'g')
        #
        g = Graph().traversal().withRemote(connection)
        ###
        t = g.V().hasLabel("project").name.iterate()
        assert 0 == len(t.side_effects.keys())
        try:
            m = t.side_effects["m"]
            raise Exception("Accessing a non-existent key should throw an error")
        except KeyError:
            pass
        ###
        t = g.V().out("created").groupCount("m").by("name")
        results = t.toSet()
        assert 2 == len(results)
        assert Vertex(3) in results
        assert Vertex(5) in results
        assert 1 == len(t.side_effects.keys())
        assert "m" in t.side_effects.keys()
        m = t.side_effects["m"]
        assert isinstance(m, dict)
        assert 2 == len(m)
        assert 3 == m["lop"]
        assert 1 == m["ripple"]
        assert isinstance(m["lop"], long)
        assert isinstance(m["ripple"], long)
        ###
        t = g.V().out("created").groupCount("m").by("name").name.aggregate("n")
        results = t.toSet()
        assert 2 == len(results)
        assert "lop" in results
        assert "ripple" in results
        assert 2 == len(t.side_effects.keys())
        assert "m" in t.side_effects.keys()
        assert "n" in t.side_effects.keys()
        n = t.side_effects.get("n")
        assert isinstance(n, dict)
        assert 2 == len(n)
        assert "lop" in n.keys()
        assert "ripple" in n.keys()
        assert 3 == n["lop"]
        assert 1 == n["ripple"]

        t = g.withSideEffect('m', 32).V().map(lambda: "x: x.sideEffects('m')")
        results = t.toSet()
        assert 1 == len(results)
        assert 32 == list(results)[0]
        assert 32 == t.side_effects['m']
        assert 1 == len(t.side_effects.keys())
        try:
            x = t.side_effects["x"]
            raise Exception("Accessing a non-existent key should throw an error")
        except KeyError:
            pass
        connection.close()
Example #16
0
File: views.py Project: vivtan11/SF
def actors(request):
    graph = Graph()
    remoteConn = DriverRemoteConnection('ws://<neptune endpoint>:8182/gremlin',
                                        'g')
    g = graph.traversal().withRemote(remoteConn)
    myList = g.V().has(
        'title', request.POST['movie_name']).in_().limit(40).values().toList()
    remoteConn.close()
    context = {'movie': request.POST['movie_name'], 'actors': myList}
    return render(request, 'polls/movie-results.html', context)
Example #17
0
def lambda_handler(event, context):
    print(event)
    graph = Graph()
    table = dynamodb_client.Table('user')
    tmp = table.scan()
    dict1 = {}
    for item in tmp['Items']:
        dict1[item['UID']] = []
        pair = (item['firstName'], item['lastName'], item['pic_url'])
        dict1[item['UID']].append(pair)

    print(dict1)
    remoteConn = DriverRemoteConnection(
        'ws://neptunedbinstance-3f8bwqre3vsy.cft44vxyghsh.us-east-1.neptune.amazonaws.com:8182/gremlin',
        'g')
    g = graph.traversal().withRemote(remoteConn)
    # a=g.V().hasLabel('User').has('uid', '1834389').next()
    # b=g.V().hasLabel('User').has('uid', '594112').next()
    # g.V(a).addE('Friend').to(b).iterate()
    key = event["userId"]
    friends= g.V().hasLabel('User').has('uid', key).\
             both('FRIEND').aggregate('friends'). \
             valueMap().toList()
    list2 = []
    for item in friends:
        tmplist = []
        uid = item["uid"]
        tmplist.append(uid[0])
        for tmp in dict1[uid[0]][0]:
            tmplist.append(tmp)
        list2.append(tmplist)
    return {'statusCode': 200, 'body': list2}
    count = 0
    recommend_list = {
        k: v
        for k, v in sorted(recommend.items(), key=lambda item: -item[1])
    }
    list1 = []
    for item in recommend_list:
        if item != key:
            data = dynamodb.get_item(TableName='user',
                                     Key={'UID': {
                                         'S': str(item)
                                     }})
            pair = (str(item), data['Item']['firstName']['S'],
                    data['Item']['lastName']['S'],
                    data['Item']['pic_url']['S'])
            list1.append(pair)
            count += 1
            if (count == 3):
                break
    print(list1)
    remoteConn.close()
    # TODO implement
    return {'statusCode': 200, 'body': list1}
Example #18
0
def movies(request):
    graph = Graph()
    remoteConn = DriverRemoteConnection('ws://<path to neptune>:8182/gremlin',
                                        'g')
    g = graph.traversal().withRemote(remoteConn)
    #print(g.V().limit(2).toList())
    myList = g.V().has(
        'name', request.POST['actor_name']).out().limit(40).values().toList()
    remoteConn.close()
    context = {'actor': request.POST['actor_name'], 'movies': myList}
    return render(request, 'polls/movie-results.html', context)
Example #19
0
def main():
    graph = Graph()
    remote = DriverRemoteConnection(
        'ws://' + os.environ['DB_ENDPOINT'] + ':8182/gremlin', 'g')
    g = graph.traversal().withRemote(remote)

    print('Flushing existing vertices in local db...')
    flush(g)
    print('Done.')

    remote.close()
Example #20
0
 def _executor(self, q, loop):
     try:
         connection = DriverRemoteConnection('ws://localhost:45940/gremlin',
                                             'g',
                                             loop=loop)
         g = Graph().traversal().withRemote(connection)
         assert len(g.V().toList()) == 6
     except:
         q.put(sys.exc_info()[0])
     else:
         q.put('success!')
         connection.close()
Example #21
0
File: views.py Project: vivtan11/SF
def separation(request):
    statics.load_statics(globals())
    inputs = [x.strip() for x in request.POST['actor_names'].split(',')]
    graph = Graph()
    remoteConn = DriverRemoteConnection(
        'ws://<neptune endpoint>.com:8182/gremlin', 'g')
    g = graph.traversal().withRemote(remoteConn)
    myList = g.V().has(
        'name', inputs[0]).repeat(out().in_().simplePath()).until(
            has('name',
                inputs[1])).path().by('name').by('title').limit(40).toList()
    remoteConn.close()
    context = {'actors': request.POST['actor_names'], 'separation': myList}
    return render(request, 'polls/movie-results.html', context)
Example #22
0
def kg_testing(inst=1, M=10, N=5, testing=False):
    # number of data points and properties
    m = M
    p = N
    if p > const.MAX_FEATURES:
        p = const.MAX_FEATURES
    # define the number of splits of each property
    s = p if p <= const.MAX_SPLITS else const.MAX_SPLITS
    # uniformly sample values between 0 and 1 as the data set
    dat = np.random.sample(size=(m, p))
    # create column names (normally obtained by var.dtype.names)
    #
    # use an explicit dict to make sure that the order is preserved
    coln = [("col" + str(i), (i - 1)) for i in range(1, p + 1)]
    # create the data for the sample knowledge graph (only one brain)
    kgdat = create_kg(inst, dat, s, [[int(i) for i in np.asarray(coln)[:, 1]]])
    # populate the knowledge graph into the remote DB
    #
    # instantiate a JanusGraph object
    graph = Graph()
    # connection to the remote server
    conn = DriverRemoteConnection(url_kg(inst), 'g')
    # get the remote graph traversal
    g = graph.traversal().withRemote(conn)
    # we only want to process the right brain
    print(kg(const.V, inst, coln, kgdat, g, False, testing))
    # after building the knowledge graph, use the output of ocr to test the GloVe write
    #
    # call cognitive to produce the ocr output
    oret = ocr_testing()
    # get the location of the glove file
    src = cfg["instances"][inst]["src"]["index"]
    typ = cfg["instances"][inst]["src"]["types"]["glove"]
    gfl = cfg["instances"][inst]["sources"][src][typ]["connection"]["file"]
    # call extendglove to produce the GloVe output and transform it to an array
    # with the first term in each row being the key and all other terms are values
    rdat = extendglove(oret[0][0], gfl[0])
    rdat = [(k, v) for k, v in list(rdat.items())[0:M]]
    # write the glove output to the knowledge graph
    print(kg(const.ENTS, inst, coln, rdat, g, False, testing))
    # get the ocr data ... using the real way to get the ocr data here
    typ = cfg["instances"][inst]["src"]["types"]["ocrf"]
    pdfs = cfg["instances"][inst]["sources"][src][typ]["connection"]["files"]
    cdat = cognitive(const.OCR, pdfs, inst, False, testing)
    # write the ocr data to the graph
    print(kg(const.CONS, inst, coln, cdat[1:], g, True, testing))
    # close the connection
    conn.close()
    # test the thought function with the default number of predictions 3
    print(thought(inst, coln))
Example #23
0
def main():
    connection = DriverRemoteConnection('ws://localhost:8182/gremlin',
                                        '{0}_traversal'.format('bandi'))
    # g = traversal().withRemote(connection)

    c = client.Client('ws://localhost:8182/gremlin',
                      '{0}_traversal'.format('bandi'))

    results = c.submit(
        'g.V().group().by(label).by(properties().group().by(key).by(value().map{it.get().getClass()}))'
    ).all().result()
    nodes_info = NodeInfo.from_result(results[0])

    encoded = yaml.safe_dump({'nodes': map(lambda e: e.to_dict(), nodes_info)})
    print(encoded)
    print(NodeInfo.from_dict(yaml.safe_load(encoded)))
    connection.close()
Example #24
0
File: views.py Project: vivtan11/SF
def joint_movies(request):
    statics.load_statics(globals())
    inputs = [x.strip() for x in request.POST['actor_names'].split(',')]
    graph = Graph()
    remoteConn = DriverRemoteConnection('ws://<neptune endpoint>:8182/gremlin',
                                        'g')
    g = graph.traversal().withRemote(remoteConn)
    if (len(inputs) == 2):
        myList = g.V().has('name', inputs[0]).repeat(out().where(__.in_().has(
            'name', inputs[1]))).emit().values().toList()
    else:
        myList = g.V().has('name', inputs[0]).repeat(out().where(__.in_().has(
            'name', inputs[1])).where(__.in_().has(
                'name', inputs[2]))).emit().values().toList()

    remoteConn.close()
    context = {'actor': request.POST['actor_names'], 'movies': myList}
    return render(request, 'polls/movie-results.html', context)
Example #25
0
def lambda_handler(event, context):
    graph = Graph()

    remoteConn = DriverRemoteConnection(
        'ws://neptunedbinstance-3f8bwqre3vsy.cft44vxyghsh.us-east-1.neptune.amazonaws.com:8182/gremlin',
        'g')
    g = graph.traversal().withRemote(remoteConn)
    # a=g.V().hasLabel('User').has('uid', '1834389').next()
    # b=g.V().hasLabel('User').has('uid', '594112').next()
    # g.V(a).addE('Friend').to(b).iterate()
    key = event["userId"]
    recommend = g.V().hasLabel('User').has('uid', key).\
                 both('FRIEND').aggregate('friends'). \
                 both('FRIEND'). \
                 where(P.without('friends')). \
                 groupCount().by('uid'). \
                 next()
    friends= g.V().hasLabel('User').has('uid', key).\
             both('FRIEND').aggregate('friends'). \
             valueMap().toList()
    print(friends)
    count = 0
    recommend_list = {
        k: v
        for k, v in sorted(recommend.items(), key=lambda item: -item[1])
    }
    list1 = []
    for item in recommend_list:
        if item != key:
            data = dynamodb.get_item(TableName='user',
                                     Key={'UID': {
                                         'S': str(item)
                                     }})
            pair = (str(item), data['Item']['firstName']['S'],
                    data['Item']['lastName']['S'],
                    data['Item']['pic_url']['S'])
            list1.append(pair)
            count += 1
            if (count == 10):
                break
    prediction(key)
    print(list1)
    remoteConn.close()
    return {'statusCode': 200, 'body': list1}
Example #26
0
def _executor(q, conn):
    close = False
    if not conn:
        # This isn't a fixture so close manually
        close = True
        conn = DriverRemoteConnection(
            'ws://localhost:45940/gremlin', 'gmodern', pool_size=4)
    try:
        g = traversal().withRemote(conn)
        future = g.V().promise()
        t = future.result()
        assert len(t.toList()) == 6
    except:
        q.put(sys.exc_info()[0])
    else:
        q.put('success!')
        # Close conn
        if close:
            conn.close()
def _executor(q, conn):
    close = False
    if not conn:
        # This isn't a fixture so close manually
        close = True
        conn = DriverRemoteConnection(
            'ws://localhost:45940/gremlin', 'gmodern', pool_size=4)
    try:
        g = Graph().traversal().withRemote(conn)
        future = g.V().promise()
        t = future.result()
        assert len(t.toList()) == 6
    except:
        q.put(sys.exc_info()[0])
    else:
        q.put('success!')
        # Close conn
        if close:
            conn.close()
Example #28
0
def get_graph(term: str):
    """
    Retrieves all the related entities a term has in Neptune
    """

    graph = Graph()
    connection = DriverRemoteConnection(
        f'wss://{os.environ["NEPTUNE_ENDPOINT"]}:8182/gremlin', 'g')

    g = graph.traversal().withRemote(connection)

    entities = g.V().has(
        "term", "value",
        term.upper()).out("has_entity").valueMap(True).toList()
    connection.close()

    entity_names = set(
    )  # Use this to filter out repeating entity names, `dedup()` does not working 100% of the time
    result = []
    for e in entities:
        data = {}

        if e["value"][0] in entity_names:
            continue
        else:
            entity_names.add(e["value"][0])

        for k, v in e.items():
            k_str = str(k)

            if "T." in k_str:
                name = k_str.split(".")[1]
                data[name] = v
                continue

            data[k] = v[0]

        result.append(data)

    return result
Example #29
0
    def test_traversals(self):
        statics.load_statics(globals())
        connection = DriverRemoteConnection('ws://localhost:45940/gremlin',
                                            'g')
        assert "remoteconnection[ws://localhost:45940/gremlin,g]" == str(
            connection)
        g = Graph().traversal().withRemote(connection)

        assert long(6) == g.V().count().toList()[0]
        #
        assert Vertex(1) == g.V(1).next()
        assert 1 == g.V(1).id().next()
        assert Traverser(Vertex(1)) == g.V(1).nextTraverser()
        assert 1 == len(g.V(1).toList())
        assert isinstance(g.V(1).toList(), list)
        results = g.V().repeat(out()).times(2).name
        results = results.toList()
        assert 2 == len(results)
        assert "lop" in results
        assert "ripple" in results
        #
        assert 10 == g.V().repeat(both()).times(5)[0:10].count().next()
        assert 1 == g.V().repeat(both()).times(5)[0:1].count().next()
        assert 0 == g.V().repeat(both()).times(5)[0:0].count().next()
        assert 4 == g.V()[2:].count().next()
        assert 2 == g.V()[:2].count().next()
        #
        results = g.withSideEffect(
            'a', ['josh', 'peter'
                  ]).V(1).out('created').in_('created').values('name').where(
                      within('a')).toList()
        assert 2 == len(results)
        assert 'josh' in results
        assert 'peter' in results
        # todo: need a traversal metrics deserializer
        g.V().out().profile().next()
        connection.close()
Example #30
0
def run_queries(cluster):
    cluster_url = 'ws://localhost:%s/gremlin' % cluster['Port']

    # test Client API
    print('Connecting to Neptune Graph DB cluster URL: %s' % cluster_url)
    graph_client = gremlin_client.Client(cluster_url, 'g')

    values = '[1,2,3,4]'
    print('Submitting values: %s' % values)
    result_set = graph_client.submit(values)
    future_results = result_set.all()
    results = future_results.result()
    print('Received values from cluster: %s' % results)
    assert results == [1, 2, 3, 4]

    future_result_set = graph_client.submitAsync('[1,2,3,4]')
    result_set = future_result_set.result()
    result = result_set.one()
    assert result == [1, 2, 3, 4]
    assert result_set.done.done()
    graph_client.close()

    # test DriverRemoteConnection API
    graph = Graph()
    conn = DriverRemoteConnection(cluster_url, 'g')
    g = graph.traversal().withRemote(conn)
    vertices_before = g.V().toList()
    print('Existing vertices in the graph: %s' % vertices_before)
    print('Adding new vertices "v1" and "v2" to the graph')
    g.addV().property('id', 'v1').property('name', 'Vertex 1').next()
    g.addV().property('id', 'v2').property('name', 'Vertex 2').next()
    vertices_after = g.V().toList()
    print('New list of vertices in the graph: %s' % vertices_after)
    result = set(vertices_after) - set(vertices_before)
    assert len(result) == 2
    conn.close()
        # col3, col2 is col1
        id_0 = name_to_id.get(vals[0], None)
        id_1 = name_to_id.get(vals[1], None)
        id_2 = name_to_id.get(vals[2], None)
        if id_0 is not None:
            if id_1 is not None:
                g.addE('is').from_(id_1).to(id_0).next()
            if id_2 is not None:
                g.addE('is').from_(id_2).to(id_0).next()
        # col2 knows col3
        if id_1 is not None and id_2 is not None:
            g.addE('knows').from_(id_1).to(id_2).next()
    return


if __name__ == "__main__":
    # connect gremlin server
    connection = DriverRemoteConnection('ws://localhost:8182/gremlin', 'g')
    g = traversal().withRemote(connection)

    # remove all vertices and edges in the graph
    g.V().drop().iterate()
    # g.E().drop().iterate()

    # populate data into graph database
    alerts = pd.read_csv('../data/alerts.csv', header = None).values
    load_data(g, alerts)
    connection.close()
    print("Data is loaded successfully.")

Example #32
0
class JanusGraphClient(object):
    """ JanusGraph Client Builder which adds the Serializers for JanusGraph specific objects, predicates etc. """

    REMOTE_CONNECTION = None

    def __init__(self, version=3.0):
        """ Initializing with GraphSON version 3.0

        Args:
            version (int):
        """

        self.graphsonVersion = version

    def connect(self,
                host="localhost",
                port="8182",
                traversal_source="g",
                **kwargs):
        """ Connect to JanusGraph's gremlin-server instance. Takes Host, Port and Graph

        Args:
            host (str): The HOST of JanusGraph gremlin-server instance. Defaults to localhost
            port (str): The PORT of JanusGraph gremlin-server instance. Defaults to 8182.
            graph (str): The GraphTraversalSource being exposed from gremlin-server instance. Defaults to g

        Keyword Args:
            graphson_reader (GraphSONReader): GraphSONReader object with required Deserializers registered
            graphson_writer (GraphSONWriter): GraphSONWriter object with required Serializers registered

        Raises:
            AttributeError: When invalid Keyword arguments is provided. The expected key needs to be
            `graphson_reader` and `graphson_writer`.

        Returns:
            JanusGraphClient
        """

        URL = "ws://{}:{}/gremlin".format(host, port)

        if not kwargs:

            graphson_reader = JanusGraphSONReader().build()
            graphson_writer = JanusGraphSONWriter().build()

        else:
            if "graphson_reader" in kwargs and "graphson_writer" in kwargs:
                graphson_reader = kwargs["graphson_reader"]
                graphson_writer = kwargs["graphson_writer"]
            else:
                raise AttributeError(
                    "Additional parameters if provided needs to be keywords arguments of "
                    "`graphson_reader` and `graphson_writer`")

        self.REMOTE_CONNECTION = DriverRemoteConnection(
            URL,
            traversal_source,
            graphson_reader=graphson_reader,
            graphson_writer=graphson_writer)

        return self

    def get_connection(self):
        """ Get the RemoteConnection object, so that same can be used to create GraphTraversalSource.

        Returns:
            DriverRemoteConnection
        """

        return self.REMOTE_CONNECTION

    def close(self):
        try:
            self.REMOTE_CONNECTION.close()
            return True
        except:
            return False
edge = g.V().has('first_name', 'Shane').addE('son_of').to(g.V().has('first_name', 'Edith')).property('known_since','1964').next()
edge = g.V().has('first_name', 'Edith').addE('mother_of').to(g.V().has('first_name', 'Shane')).property('known_since','1964').next()
edge = g.V().has('first_name', 'Shane').addE('husband_of').to(g.V().has('first_name', 'Mary')).property('known_since','1989').next()
edge = g.V().has('first_name', 'Mary').addE('wife_of').to(g.V().has('first_name', 'Shane')).property('known_since','1989').next()
edge = g.V().has('first_name', 'Shane').addE('father_of').to(g.V().has('first_name', 'Betty')).property('known_since','1991').next()
edge = g.V().has('first_name', 'Betty').addE('daughter_of').to(g.V().has('first_name', 'Shane')).property('known_since','1991').next()
edge = g.V().has('first_name', 'Mary').addE('mother_of').to(g.V().has('first_name', 'Betty')).property('known_since','1991').next()
edge = g.V().has('first_name', 'Betty').addE('daughter_of').to(g.V().has('first_name', 'Mary')).property('known_since','1991').next()


#print out all the node's first names
print(g.V().first_name.toList()) 

#print out all the properties of person whose's first name is Shane
print(g.V().has('person','first_name','Shane').valueMap().next()) 

#traversing the graph starting with Betty to then Shane to then Edith
print(g.V().has('first_name', 'Betty').out('daughter_of').out('son_of').valueMap().toList())
print("\n\n\n")

#Print out all the nodes
people = g.V().valueMap().toList()
print(people)

#Print out all the connections (edges)
connections = g.E().valueMap().toList()
print(connections)

#Closing the connection
remoteConn.close()
Example #34
0
class TrajectoryGraph:
    # use for binding
    LABEL = "label"
    CAMID = "camId"
    TIME = "time"
    IMAGE = "image"
    OUT_V = "outV"
    IN_V = "inV"
    LIMIT = "limit"
    VID = "vid"
    FEA = "feature"
    CONFIDENCE = "confidence"
    INDEX = "index"

    def __init__( self ):
        self.b = Bindings()
        self.graph = Graph()
        self.connection = DriverRemoteConnection('ws://130.207.122.57:8182/gremlin','g')
        self.g = self.graph.traversal().withRemote(self.connection)
        logging.info("Connected")
    
    def addDetection(self, vehId, camId, timestamp, index):
        v = self.g.addV(self.b.of(TrajectoryGraph.LABEL, vehId))\
            .property(TrajectoryGraph.CAMID, self.b.of(TrajectoryGraph.CAMID, camId))\
            .property(TrajectoryGraph.TIME, self.b.of(TrajectoryGraph.TIME, timestamp))\
            .property(TrajectoryGraph.INDEX, self.b.of(TrajectoryGraph.INDEX, index))\
            .id().next()
        
        logging.info("Trajectory Vertex v[{}] ({}, {}, {}) created.".format(v, vehId, camId, timestamp))
        
        return v
        
    def linkDetection(self, src, dest, confidence):
        logging.info("Link vertex v[{}] to v[{}]. Confidence {}".format(src, dest, confidence))
        self.g.V(self.b.of(TrajectoryGraph.OUT_V, src))\
            .as_("a")\
            .V(self.b.of(TrajectoryGraph.IN_V, dest))\
            .addE(self.b.of(TrajectoryGraph.LABEL, "next"))\
            .from_("a")\
            .property(TrajectoryGraph.CONFIDENCE, self.b.of(TrajectoryGraph.CONFIDENCE, confidence))\
            .iterate()
    
    def getValueMapById(self,id):
        value = self.g.V(self.b.of(TrajectoryGraph.VID, id)).valueMap(True).next()
        logging.info("Get detection valuemap {} for V[{}]".format(value.keys(), id))
        return value


    def getLatestDetectionsByCamId(self, camId,limit):
        # timelimit support can be considered.
        vehIds = self.g.V().has(TrajectoryGraph.CAMID, self.b.of(TrajectoryGraph.CAMID, camId)).order().by(TrajectoryGraph.TIME, Order.decr).limit(self.b.of(TrajectoryGraph.LIMIT, limit)).id().toList()
        logging.info("LatestDetections by camera {}: {}".format(camId, vehIds))
        return vehIds

    def getNextDetectionsById(self, id, limit):
        #  This can be used to return self
        vehIds = self.g.V(self.b.of(TrajectoryGraph.OUT_V, id)).emit().repeat(__.out()).times(self.b.of(TrajectoryGraph.LIMIT, limit)).id().toList()
        logging.info("NextDetections from V[{}]: {}".format(id, vehIds))
        return vehIds

    def getPrevDetectionsById(self, id, limit):
        vehIds = self.g.V(self.b.of(TrajectoryGraph.IN_V, id)).repeat(__.in_()).times(limit).emit().id().toList()
        vehIds = vehIds[::-1]
        logging.info("PrevDetections from V[{}]: {}".format(id, vehIds))
        return vehIds

    def clear(self):
        logging.info("TrajectoryGraph dropped")
        self.g.V().drop().iterate()
    
    def shutdown(self):
        logging.info("TrajectoryGraph closed")
        self.connection.close()
        self.g = None
        self.graph = None
Example #35
0
class GremlinStorageConnector(BaseStorageConnector):
    """Storage Connector for Gremlin databases."""

    _g: Optional[Traversal] = None
    connection: Optional[DriverRemoteConnection] = None

    def __init__(self,
                 endpoint_url: str,
                 supports_multiple_labels=False,
                 test_prefix: str = "",
                 **kwargs) -> None:
        """Create a GremlinStorageConnector.

        Arguments:
            endpoint_url: The url of the gremlin endpoint to connect to (e.g. ``ws://localhost:8182``)
            supports_multiple_labels: Some GraphDBs (Neptune/Neo4J) support multiple labels on a single vertex.
            test_prefix: A prefix that will be prepended to edge and vertex ids to allow scenario separation.
            **kwargs: Any unspecified args will be pased to the ``DriverRemoteConnection`` object.
        """
        self.endpoint_url = endpoint_url
        self.supports_multiple_labels = supports_multiple_labels
        self.test_prefix = test_prefix
        self.connection_args = kwargs

    def init(self) -> None:
        ...

    @property
    def g(self) -> Traversal:
        if not self.connection:
            self.open()
        if not self._g:
            self._g = traversal().withRemote(self.connection)
        return self._g

    def open(self) -> DriverRemoteConnection:
        if not self.connection:
            logger.debug("Opening connection to %s", self.endpoint_url)
            self.connection = DriverRemoteConnection(
                f"{self.endpoint_url}/gremlin", "g", **self.connection_args)

    def close(self) -> None:
        logger.debug("Closing gremlin connection")
        if self.connection:
            self.connection.close()
        self.connection = None
        self._g = None

    def write_resource(self, resource: CloudWandererResource) -> None:
        """Persist a single resource to storage.

        Arguments:
            resource (CloudWandererResource): The CloudWandererResource to write.
        """
        self._write_resource(resource)
        self._write_dependent_resource_edges(resource)

    def _write_resource(self, resource: CloudWandererResource) -> None:
        primary_label = generate_primary_label(resource.urn)

        traversal = self._write_vertex(vertex_id=self.generate_vertex_id(
            resource.urn),
                                       vertex_labels=[primary_label])
        traversal = (traversal.property(
            Cardinality.single,
            "_cloud_name", resource.urn.cloud_name).property(
                Cardinality.single,
                "_account_id", resource.urn.account_id).property(
                    Cardinality.single,
                    "_region", resource.urn.region).property(
                        Cardinality.single, "_service",
                        resource.urn.service).property(
                            Cardinality.single, "_resource_type",
                            resource.urn.resource_type).property(
                                Cardinality.single, "_discovery_time",
                                resource.discovery_time.isoformat()).property(
                                    Cardinality.single, "_urn",
                                    str(resource.urn)))
        for id_part in resource.urn.resource_id_parts:
            traversal.property(Cardinality.set_, "_resource_id_parts", id_part)
        self._write_properties(
            traversal=traversal,
            properties=resource.cloudwanderer_metadata.resource_data)

        self._write_relationships(resource)
        self._clean_up_relationships(urn=resource.urn,
                                     cutoff=resource.discovery_time)
        if not resource.urn.is_partial:
            self._repoint_vertex_edges(vertex_label=primary_label,
                                       new_resource_urn=resource.urn)

    def _write_dependent_resource_edges(
            self, resource: CloudWandererResource) -> None:
        for dependent_urn in resource.dependent_resource_urns:
            logger.debug("Writing dependent resource edge from %s to %s",
                         resource.urn, dependent_urn)
            self._write_edge(
                edge_id=self.generate_edge_id(resource.urn, dependent_urn),
                edge_label="has",
                source_vertex_id=self.generate_vertex_id(resource.urn),
                destination_vertex_id=self.generate_vertex_id(dependent_urn),
                owner_id=self.generate_vertex_id(resource.urn),
                discovery_time=resource.discovery_time,
            )

    def _clean_up_relationships(self, urn: PartialUrn,
                                cutoff: datetime) -> None:
        logger.debug("Cleaning up edges owned by %s discovered before %s",
                     self.generate_vertex_id(urn), cutoff)
        (self.g.V(self.generate_vertex_id(urn)).bothE().as_("edge").has(
            "_edge_owner", self.generate_vertex_id(urn)).where(
                __.values("_discovery_time").is_(P.lt(
                    cutoff.isoformat()))).select("edge").drop()).iterate()

    def _write_relationships(self, resource: CloudWandererResource) -> None:
        for relationship in resource.relationships:
            inferred_partner_urn = relationship.partial_urn
            try:
                pre_existing_resource_urn = (self._lookup_resource(
                    relationship.partial_urn).propertyMap().toList()[0]["_urn"]
                                             [0].value)
            except IndexError:
                pre_existing_resource_urn = None

            if pre_existing_resource_urn:
                logger.debug(
                    "Writing relationship with pre_existing_resource_urn %s",
                    pre_existing_resource_urn)
                self._write_relationship_edge(
                    resource_urn=resource.urn,
                    relationship_resource_urn=pre_existing_resource_urn,
                    direction=relationship.direction,
                    discovery_time=resource.discovery_time,
                )
                if pre_existing_resource_urn != inferred_partner_urn:
                    self._delete_relationship_edge(
                        resource_urn=resource.urn,
                        relationship_resource_urn=inferred_partner_urn,
                        direction=relationship.direction,
                    )
                continue
            logger.debug("Writing inferred resource %s", inferred_partner_urn)
            self._write_resource(
                CloudWandererResource(urn=cast(URN, relationship.partial_urn),
                                      resource_data={}))
            self._write_relationship_edge(
                resource_urn=resource.urn,
                relationship_resource_urn=inferred_partner_urn,
                direction=relationship.direction,
                discovery_time=resource.discovery_time,
            )

    def _repoint_vertex_edges(
            self, vertex_label: str,
            new_resource_urn: Union[URN, PartialUrn]) -> None:
        # https://tinkerpop.apache.org/docs/current/recipes/#edge-move

        resources_of_the_same_type = self.g.V().as_("old_vertex").hasLabel(
            vertex_label)
        for id_part in new_resource_urn.resource_id_parts:
            resources_of_the_same_type.has("_resource_id_parts", id_part)
        resources_with_same_id_but_unknown = (resources_of_the_same_type.or_(
            __.has("_account_id", "unknown"), __.has("_region",
                                                     "unknown"))).toList()

        for old_vertex in resources_with_same_id_but_unknown:
            # Outbound
            old_vertices_outbound_edges = self.g.V(old_vertex).outE().as_("e1")
            old_outbound_edges_partner_vertex = old_vertices_outbound_edges.inV(
            ).as_("b")

            new_vertex = old_outbound_edges_partner_vertex.V(
                self.generate_vertex_id(new_resource_urn)).as_("new_vertex")
            add_old_outbound_edges_to_new_vertex = (
                new_vertex.addE("has").to("b").as_("e2").sideEffect(
                    __.select("e1").properties().unfold().as_("p").select(
                        "e2").property(
                            __.select("p").key(),
                            __.select("p").value())))
            add_old_outbound_edges_to_new_vertex.select("e1").drop().iterate()
            # Inbound
            old_vertices_inbound_edges = self.g.V(old_vertex).select(
                "old_vertex").inE().as_("old_inbound_edge")
            old_inbound_edges_partner_vertex = old_vertices_inbound_edges.inV(
            ).as_("c")

            new_vertex = old_inbound_edges_partner_vertex.select("new_vertex")
            add_old_inbound_edges_to_new_vertex = (new_vertex.addE(
                "has").from_("c").as_("new_inbound_edge").sideEffect(
                    __.select("old_inbound_edge").properties().unfold().as_(
                        "p").select("new_inbound_edge").property(
                            __.select("p").key(),
                            __.select("p").value())))
            add_old_inbound_edges_to_new_vertex.select(
                "old_inbound_edge").drop().iterate()

            # Delete old vertex
            self.g.V(old_vertex).drop().iterate()

    def _delete_relationship_edge(self, resource_urn: PartialUrn,
                                  relationship_resource_urn: PartialUrn,
                                  direction: RelationshipDirection) -> None:
        if direction == RelationshipDirection.INBOUND:
            self._delete_edge(
                self.generate_edge_id(relationship_resource_urn, resource_urn))
        else:
            self._delete_edge(
                self.generate_edge_id(resource_urn, relationship_resource_urn))

    def _write_relationship_edge(
        self,
        resource_urn: PartialUrn,
        relationship_resource_urn: PartialUrn,
        direction: RelationshipDirection,
        discovery_time: datetime,
    ) -> None:
        logger.debug("Writing edge relationship between %s and %s",
                     resource_urn, relationship_resource_urn)
        if direction == RelationshipDirection.INBOUND:
            self._write_edge(
                edge_id=self.generate_edge_id(relationship_resource_urn,
                                              resource_urn),
                edge_label="has",
                source_vertex_id=self.generate_vertex_id(
                    relationship_resource_urn),
                destination_vertex_id=self.generate_vertex_id(resource_urn),
                owner_id=self.generate_vertex_id(resource_urn),
                discovery_time=discovery_time,
            )
        else:
            self._write_edge(
                edge_id=self.generate_edge_id(resource_urn,
                                              relationship_resource_urn),
                edge_label="has",
                source_vertex_id=self.generate_vertex_id(resource_urn),
                destination_vertex_id=self.generate_vertex_id(
                    relationship_resource_urn),
                owner_id=self.generate_vertex_id(resource_urn),
                discovery_time=discovery_time,
            )

    def _lookup_resource(self, partial_urn: PartialUrn) -> Traversal:
        vertex_label = generate_primary_label(partial_urn)
        logger.debug("looking up resource with label %s", vertex_label)
        traversal = (self.g.V().hasLabel(vertex_label).has(
            "_cloud_name",
            partial_urn.cloud_name).has("_service", partial_urn.service).has(
                "_resource_type", partial_urn.resource_type))
        for id_part in partial_urn.resource_id_parts:
            traversal.has("_resource_id_parts", id_part)
        if partial_urn.account_id != "unknown":
            traversal.has("_account_id", partial_urn.account_id)
        if partial_urn.region != "unknown":
            traversal.has("_region", partial_urn.region)
        return traversal

    def _write_vertex(self, vertex_id: str,
                      vertex_labels: List[str]) -> Traversal:
        logger.debug("Writing vertex %s", vertex_id)
        if self.supports_multiple_labels:
            vertex_label = "::".join(vertex_labels)
        else:
            vertex_label = vertex_labels[0]
        return self.g.V(vertex_id).fold().coalesce(
            __.unfold(),
            __.addV(vertex_label).property(T.id, vertex_id))

    def _write_properties(self, traversal: Traversal,
                          properties: Dict[str, Any]) -> Traversal:
        logger.debug("Writing properties: %s", properties)
        for property_name, property_value in properties.items():
            traversal = traversal.property(Cardinality.single,
                                           str(property_name),
                                           str(property_value))
        traversal_size = len(str(traversal).encode())
        try:
            traversal.next()
        except RuntimeError as ex:
            raise RuntimeError(
                "GremlinStorageConnector got a runtime error while saving a property of "
                f"{traversal_size} bytes, check your Gremlin server's maxContentLength is larger than this."
            ) from ex

    def _write_edge(
        self,
        edge_id: str,
        edge_label: str,
        source_vertex_id: str,
        destination_vertex_id: str,
        owner_id: str,
        discovery_time: datetime,
    ) -> Traversal:
        logger.debug("Looking for edge %s", edge_id)
        edge = self.g.E(edge_id).property("_discovery_time",
                                          discovery_time.isoformat()).toList()

        if not edge:
            logger.debug("Writing edge between %s and %s", source_vertex_id,
                         destination_vertex_id)
            (self.g.V(source_vertex_id).as_("source").V(destination_vertex_id).
             addE(edge_label).from_("source").property(T.id, edge_id).property(
                 "_edge_owner",
                 owner_id).property("_discovery_time",
                                    discovery_time.isoformat())).next()

    def _delete_edge(self, edge_id: str) -> Traversal:
        logger.debug("Deleting edge %s", edge_id)
        self.g.E(edge_id).drop().iterate()

    def read_all(self) -> Iterator[dict]:
        """Return all records from storage."""
        for vertex in self.g.V().has("_urn").valueMap().toList():
            yield vertex

    def read_resource(self, urn: URN) -> Optional[CloudWandererResource]:
        """Return a resource matching the supplied urn from storage.

        Arguments:
            urn (URN): The AWS URN of the resource to return
        """
        return next(self.read_resources(urn=urn))

    def read_resources(
        self,
        cloud_name: str = None,
        account_id: str = None,
        region: str = None,
        service: str = None,
        resource_type: str = None,
        urn: Union[URN, PartialUrn] = None,
    ) -> Iterator["CloudWandererResource"]:
        """Yield a resource matching the supplied urn from storage.

        All arguments are optional.

        Arguments:
            cloud_name: The name of the cloud in question (e.g. ``aws``)
            urn: The AWS URN of the resource to return
            account_id: Cloud Account ID (e.g. ``111111111111``)
            region: AWS region (e.g. ``'eu-west-2'``)
            service: Service name (e.g. ``'ec2'``)
            resource_type: Resource Type (e.g. ``'instance'``)
        """
        if not urn:
            urn = PartialUrn(
                cloud_name=cloud_name or "unknown",
                service=service or "unknown",
                account_id=account_id or "unknown",
                region=region or "unknown",
                resource_type=resource_type or "unknown",
            )
        for vertex in self._lookup_resource(
                partial_urn=urn).propertyMap().toList():
            yield CloudWandererResource(
                urn=URN.from_string(vertex["_urn"][0].value),
                resource_data=_normalise_gremlin_attrs(vertex),
                discovery_time=datetime.strptime(
                    vertex["_discovery_time"][0].value, ISO_DATE_FORMAT),
            )

    def delete_resource(self, urn: URN) -> None:
        """Delete this resource and all its resource attributes.

        Arguments:
            urn (URN): The URN of the resource to delete
        """
        logger.debug("Deleting resource %s", urn)
        self.g.V(self.generate_vertex_id(urn)).drop().iterate()

    def delete_resource_of_type_in_account_region(
        self,
        cloud_name: str,
        service: str,
        resource_type: str,
        account_id: str,
        region: str,
        cutoff: Optional[datetime],
    ) -> None:
        """Delete resources of type in account and region unless in list of URNs.

        This is used primarily to clean up old resources.

        Arguments:
            cloud_name: The name of the cloud in question (e.g. ``aws``)
            account_id: Cloud Account ID (e.g. ``111111111111``)
            region: Cloud region (e.g. ``'eu-west-2'``)
            service: Service name (e.g. ``'ec2'``)
            resource_type: Resource Type (e.g. ``'instance'``)
            cutoff: Delete any resource discovered before this time
        """
        partial_urn = PartialUrn(
            cloud_name=cloud_name,
            service=service,
            account_id=account_id,
            region=region,
            resource_type=resource_type,
        )
        logger.debug(
            "Deleting resources that match %s that were discovered before %s",
            partial_urn, cutoff)
        traversal = self._lookup_resource(partial_urn=partial_urn)
        if cutoff:
            traversal.where(
                __.values("_discovery_time").is_(P.lt(cutoff.isoformat())))
        traversal.drop().iterate()

    def generate_vertex_id(self, urn: PartialUrn) -> str:
        """Generate a vertex id.

        Arguments:
            urn: The URN of the vertex to create.
        """
        return f"{self.test_prefix}{urn}"

    def generate_edge_id(self, source_urn: PartialUrn,
                         destination_urn: PartialUrn) -> str:
        """Generate a primary edge id.

        Arguments:
            source_urn: The URN of the resource we're generating an edge from.
            destination_urn: The URN of the resource we're generating an edge to.
        """
        logger.debug("Generating edge id: %s", source_urn)
        return f"{self.test_prefix}{source_urn}#{destination_urn}"
Example #36
0
class GremlinWrapper(object):
    def __init__(self, remote_gremlin_server):
        self.remote_server = remote_gremlin_server
        self.remote_connection = DriverRemoteConnection(
            remote_gremlin_server, 'g')
        self.g = traversal().withRemote(self.remote_connection)
        statics.load_statics(globals())

    def add_indigitous_user(self, login, email, name, uid):
        # add a user to the gremlin graph
        self.g.V() \
        .has('indigitous_user', 'uid', uid) \
        .fold() \
        .coalesce(
            unfold(),
            addV('indigitous_user') \
            .property('name', name) \
            .property('uid', uid) \
            .property('email', email) \
            .property('login', login)) \
        .toList()

    def add_github_user(self, login, email, name, uid):
        # add a user to the gremlin graph
        return self.g.V().has('github_user', 'uid', uid) \
        .fold() \
        .coalesce(
            unfold(),
            addV('github_user')
            .property('name', name) \
            .property('uid', uid) \
            .property('email', email) \
            .property('login', login)) \
        .toList()

    def add_repository(self, name):
        # add a user to the gremlin graph
        return self.g.V() \
        .has('repository', 'name', name) \
        .fold() \
        .coalesce(
            unfold(),
            addV('github_user') \
            .property('name', name)) \
        .iterate()

    def get_list_of_indigitous_users(self):
        #get a list of all people through name property.
        return self.g.V() \
               .hasLabel('indigitous_user') \
               .group() \
                   .by(__.id()) \
                   .by('email') \
               .toList()

    def edge_vertices(self, label, from_v, to_v):
        # Edge vertices
        return self.g.E() \
        .hasLabel(label) \
        .where(outV().hasId(from_v)) \
        .where(inV().hasId(to_v)) \
        .fold() \
        .coalesce( \
            unfold(), \
            addE(label) \
            .from_(V(from_v)) \
            .to(V(to_v))) \
            .iterate()

    def close(self):
        self.remote_connection.close()
        results = t.side_effects.get('b')
        assert not results

        # But 'a' should still be cached locally
        results = t.side_effects.get('a')
        assert results

        # 'a' should have been added to local keys cache, but not 'b'
        results = t.side_effects.keys()
        assert len(results) == 1
        a, = results
        assert a == 'a'

        # Try to get 'b' directly from server, should throw error
        with pytest.raises(Exception):
            t.side_effects.value_lambda('b')
        connection.close()


if __name__ == '__main__':
    test = False
    try:
        connection = DriverRemoteConnection('ws://localhost:8182/gremlin', 'g')
        test = True
        connection.close()
    except:
        print("GremlinServer is not running and this test case will not execute: " + __file__)

    if test:
        unittest.main()