def main(): """ Export graph data to CSV format. """ parser = argparse.ArgumentParser() parser.add_argument("label", type=str, help="Please specify TYPE or LABEL") parser.add_argument("-e", "--edge", action="store_true", help="output edge") args = parser.parse_args() graph = Graph() g = graph.traversal().withRemote(DriverRemoteConnection(URL, 'g')) with open(args.label + ".csv", "w", newline="") as outf: writer = csv.writer(outf, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL) lines = get_lines(g, args.edge, args.label) key_list = get_key_list(g, args.edge, lines[0]) key_list.append('label') writer.writerow(key_list) for line in lines: tmp = get_line(g, args.edge, line) tmp.append(args.label) writer.writerow(tmp)
class RemoteGremlinConnection(object): """ This class is used to encapsulate the connection to a remote Gremlin Server. """ def __init__(self, connection_string: str, remote_traversal_source): """ TODO support for authenticated requests :param connection_string: :param remote_traversal_source: """ self.__connection_str = connection_string self.__remote_traversal_source = remote_traversal_source self.__graph = Graph() @property def graph(self): return self.__graph.traversal()\ .withRemote( DriverRemoteConnection( self.__connection_str, self.__remote_traversal_source ) )
def __init__(self): graph = Graph() ip_pools = ['192.168.50.5', '192.168.50.6', '192.168.50.7'] request_ip = random.sample(ip_pools, 1)[0] self.g = graph.traversal().withRemote( DriverRemoteConnection('ws://' + request_ip + ':8182/gremlin', 'g'))
def lambda_handler(event, context): graph = Graph() uid1 = event["userId1"] uid2 = event["userId2"] remoteConn = DriverRemoteConnection('ws://neptunedbinstance-3f8bwqre3vsy.cft44vxyghsh.us-east-1.neptune.amazonaws.com:8182/gremlin','g') g = graph.traversal().withRemote(remoteConn) friends= g.V().hasLabel('User').has('uid', uid1).\ both('FRIEND').aggregate('friends'). \ valueMap().toList() list2 = [] for item in friends: uid = item["uid"] list2.append(uid[0]) if uid2 in list2: return { 'statusCode': 400 } a=g.V().hasLabel('User').has('uid', uid1).next() b=g.V().hasLabel('User').has('uid', uid2).next() g.V(a).addE('FRIEND').to(b).iterate() remoteConn.close() # TODO implement return { 'statusCode': 200 }
def lambda_handler(event, context): #print(event['biosample']) graph = Graph() g = graph.traversal().withRemote(DriverRemoteConnection(db, 'g')) record = xml.fromstring( requests.get( req.format(database="biosample", accession=event['biosample'], api_key=api_key)).text) taxid = record.find('.//Organism').attrib['taxonomy_id'] taxon = xml.fromstring( requests.get( req.format(database="taxonomy", accession=taxid, api_key=api_key)).text) accession = convert_record(record, taxon, g) print(accession) #loaded a biosample, now put this into the SRA data check queue sraq.send_message(MessageBody=json.dumps(dict(biosample_id=accession, biosample=event['biosample'], bioproject=event.get('bioproject', "")))) return ''
def data2janus(data: dict, args): from gremlin_python.structure.graph import Graph from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection address = args.output graph = Graph() connection = DriverRemoteConnection(address, 'g') g = graph.traversal().withRemote(connection) for name, vertex in data['vertex'].items(): vertex_count = len(vertex['data']) for i, properties in enumerate(vertex['data']): vv = g.addV(name) for k, v in properties.items(): vv = vv.property(k, v) vv.next() logger.info("{}: {}/{}".format(name, i, vertex_count)) for name, edge in data['edge'].items(): edge_count = len(edge['data']) for i, e in enumerate(edge['data']): from_ = g.V().hasLabel(e['from']['tag']) for k, v in e['from']['match'].items(): from_ = from_.has(k, v) to_ = g.V().hasLabel(e['to']['tag']) for k, v in e['to']['match'].items(): to_ = to_.has(k, v) ee = g.addE(name).from_(from_).to(to_) for k, v in e['data'].items(): ee = ee.property(k, v) ee.next() logger.info("{}: {}/{}".format(name, i, edge_count)) logger.info("total vertex count: {}, total edge count: {}".format( g.V().count().next(), g.E().count().next()))
def create_vertices(term: str, entities: list): """ Creates vertices to graph term to entities """ graph = Graph() connection = DriverRemoteConnection( f'wss://{os.environ["NEPTUNE_ENDPOINT"]}:8182/gremlin', 'g') g = graph.traversal().withRemote(connection) # Check if a vertex has been created for the term term_vertex = g.V().has("term", "value", term) term_vertex = term_vertex.next() if term_vertex.hasNext() else g.addV( "term").property("value", term).next() # Create an entity vertex for each and link to term for e in entities: entity_vertex = g.V().has("entity", "value", e) entity_vertex = entity_vertex.next() if entity_vertex.hasNext() else \ g.addV("entity") \ .property("value", e["entity"]) \ .property("score", e["score"]) \ .property("type", e["type"]).next() g.V(term_vertex).addE("has_entity").to(entity_vertex).iterate() connection.close()
def traversal_source(session=None, graph_name=None, execution_profile=EXEC_PROFILE_GRAPH_DEFAULT): """ Returns a TinkerPop GraphTraversalSource binded to the session and graph_name if provided. :param session: A DSE session :param graph_name: (Optional) DSE Graph name :param execution_profile: (Optional) Execution profile for traversal queries. Default is set to `EXEC_PROFILE_GRAPH_DEFAULT`. .. code-block:: python from dse.cluster import Cluster from dse_graph import DseGraph c = Cluster() session = c.connect() g = DseGraph.traversal_source(session, 'my_graph') print g.V().valueMap().toList() """ graph = Graph() traversal_source = graph.traversal() if session: traversal_source = traversal_source.withRemote( DSESessionRemoteGraphConnection(session, graph_name, execution_profile)) return traversal_source
def main(): graph = Graph() remote = DriverRemoteConnection('ws://' + os.environ['DB_ENDPOINT'] + ':8182/gremlin', 'g') g = graph.traversal().withRemote(remote) seed_users(g) remote.close()
def list_users(): graph = Graph() remote = DriverRemoteConnection('ws://' + os.environ['DB_ENDPOINT'] + ':8182/gremlin', 'g') g = graph.traversal().withRemote(remote) print(g.V().hasLabel('user').valueMap(True).by(__.unfold()).next()) return jsonify(user='******'), 200
def get_graph(): graph = Graph() g = graph.traversal().withRemote( DriverRemoteConnection('ws://' + url + '/gremlin', 'g', pool_size=20, max_workers=15)) return g
class Neptune(object): def __init__(self, uri): """ :param uri: example 'ws://54.89.143.194:8182/gremlin' """ self.graph = Graph() self.g = self.graph.traversal().withRemote( DriverRemoteConnection(uri, 'g'))
def get_janus_traversal(): """ :return: """ graph = Graph() conn_str = 'ws://' + curr_config['JANUS_HOST'] + ':8182/gremlin' connection = DriverRemoteConnection(conn_str, 'g') g = graph.traversal().withRemote(connection) return g
def actors(request): graph = Graph() remoteConn = DriverRemoteConnection('ws://<neptune endpoint>:8182/gremlin', 'g') g = graph.traversal().withRemote(remoteConn) myList = g.V().has( 'title', request.POST['movie_name']).in_().limit(40).values().toList() remoteConn.close() context = {'movie': request.POST['movie_name'], 'actors': myList} return render(request, 'polls/movie-results.html', context)
def graphTraversal(self, neptune_endpoint=None, neptune_port=None, show_endpoint=True, connection=None): if connection is None: connection = self.remoteConnection(neptune_endpoint, neptune_port, show_endpoint) graph = Graph() return graph.traversal().withRemote(connection)
class GraphWriter: def __init__(self, neptune_endpoint: str = None) -> None: if neptune_endpoint is None: neptune_endpoint = environ.get('NEPTUNE_ENDPOINT') if not neptune_endpoint.startswith('wss://'): neptune_endpoint = 'wss://' + neptune_endpoint if not neptune_endpoint.endswith('/gremlin'): neptune_endpoint += ':8182/gremlin' self.graph = Graph() self.connection = DriverRemoteConnection(neptune_endpoint, 'g') self.g = self.graph.traversal().withRemote(self.connection) def write_td_stream_message(self, message: dict): payload: dict = message['data'][0] serviceName: str = payload['service'] contents: list = payload['content'] if serviceName == 'QUOTE': self.write_quote(contents) return print('Error: Unknown serviceName - {}'.format(serviceName)) def write_quote(self, contents: list): for content in contents: symbol = content['symbol'] exchangeName = content['exchangeName'] trade_time = datetime.utcfromtimestamp(content['tradeTimeInLong'] / 1000) trade_session = "{:04d}-{:02d}-{:02d}".format( trade_time.year, trade_time.month, trade_time.day) print('Processing {}'.format(symbol)) symbol_v = self.get_or_create_vertice('instrument', 'symbol', symbol) exchange_v = self.get_or_create_vertice('exchange', 'name', exchangeName) self.get_or_create_edge(exchange_v, symbol_v, 'transacts') trading_session_v = self.get_or_create_vertice( 'trading_session', 'session', trade_session) self.get_or_create_edge(symbol_v, trading_session_v, 'trades-during') def get_or_create_vertice(self, label: str, name: str, value: str): return self.g.V().has(label, name, value).fold().coalesce( __.unfold(), __.addV(label).property(name, value)).next() def get_or_create_edge(self, v1, v2, label: str): return self.g.V(v1).as_('v1').V(v2).coalesce( __.inE(label).where(__.outV().as_('v1')), __.addE(label).from_('v1'))
def lambda_handler(event, context): print(event) graph = Graph() table = dynamodb_client.Table('user') tmp = table.scan() dict1 = {} for item in tmp['Items']: dict1[item['UID']] = [] pair = (item['firstName'], item['lastName'], item['pic_url']) dict1[item['UID']].append(pair) print(dict1) remoteConn = DriverRemoteConnection( 'ws://neptunedbinstance-3f8bwqre3vsy.cft44vxyghsh.us-east-1.neptune.amazonaws.com:8182/gremlin', 'g') g = graph.traversal().withRemote(remoteConn) # a=g.V().hasLabel('User').has('uid', '1834389').next() # b=g.V().hasLabel('User').has('uid', '594112').next() # g.V(a).addE('Friend').to(b).iterate() key = event["userId"] friends= g.V().hasLabel('User').has('uid', key).\ both('FRIEND').aggregate('friends'). \ valueMap().toList() list2 = [] for item in friends: tmplist = [] uid = item["uid"] tmplist.append(uid[0]) for tmp in dict1[uid[0]][0]: tmplist.append(tmp) list2.append(tmplist) return {'statusCode': 200, 'body': list2} count = 0 recommend_list = { k: v for k, v in sorted(recommend.items(), key=lambda item: -item[1]) } list1 = [] for item in recommend_list: if item != key: data = dynamodb.get_item(TableName='user', Key={'UID': { 'S': str(item) }}) pair = (str(item), data['Item']['firstName']['S'], data['Item']['lastName']['S'], data['Item']['pic_url']['S']) list1.append(pair) count += 1 if (count == 3): break print(list1) remoteConn.close() # TODO implement return {'statusCode': 200, 'body': list1}
def setup_graph(): try: graph = Graph() connstring = os.environ.get('GRAPH_DB') logging.info('Trying To Login') g = graph.traversal().withRemote(DriverRemoteConnection(connstring, 'g')) logging.info('Successfully Logged In') except Exception as e: # Shouldn't really be so broad logging.error(e, exc_info=True) raise BadRequestError('Could not connect to Neptune') return g
class GremlinWebSocketClient(object): def __init__(self, host='172.16.65.133', port='8182', *args, **kwargs): self.graph = Graph() self.g = self.graph.traversal().withRemote( DriverRemoteConnection('ws://{}:{}/gremlin'.format(host, port), 'g')) def __getattr__(self, name): def wrapper(*args, **kwargs): start_time = time.time()
def main(): graph = Graph() remote = DriverRemoteConnection( 'ws://' + os.environ['DB_ENDPOINT'] + ':8182/gremlin', 'g') g = graph.traversal().withRemote(remote) print('Flushing existing vertices in local db...') flush(g) print('Done.') remote.close()
def setup_graph(): try: graph = Graph() connstring = "ws://10.84.86.123:8182/gremlin" logging.info('Trying To Login') g = graph.traversal().withRemote(DriverRemoteConnection(connstring, 'g2')) logging.info('Successfully Logged In') except Exception as e: # Shouldn't really be so broad logging.error(e, exc_info=True) raise BadRequestError('Could not connect to Neptune') return g
def get_traversal(self): graph = Graph() try: # take only non deleted resources return graph.traversal().withRemote( DriverRemoteConnection( 'ws://%s/gremlin' % self.gremlin_server, 'g')).withStrategies( SubgraphStrategy(vertices=__.has('deleted', 0))) except (HTTPError, socket.error) as e: raise CommandError('Failed to connect to Gremlin server: %s' % e)
def movies(request): graph = Graph() remoteConn = DriverRemoteConnection('ws://<path to neptune>:8182/gremlin', 'g') g = graph.traversal().withRemote(remoteConn) #print(g.V().limit(2).toList()) myList = g.V().has( 'name', request.POST['actor_name']).out().limit(40).values().toList() remoteConn.close() context = {'actor': request.POST['actor_name'], 'movies': myList} return render(request, 'polls/movie-results.html', context)
def get_graph(self): time_point = int(time.time()) - 5 * 60 graph = Graph() try: # take only resources updated at least 5min ago and not deleted return graph.traversal().withRemote( DriverRemoteConnection('ws://%s/gremlin' % self.gremlin_server, 'g') ).withStrategies( SubgraphStrategy(vertices=__.has('updated', lt(time_point)).has('deleted', 0)) ) except (HTTPError, socket.error) as e: raise CommandError('Failed to connect to Gremlin server: %s' % e)
def setup_graph(): try: graph = Graph() connstring = os.environ.get('GRAPH_DB') logging.info('trying to login') g = graph.traversal().withRemote( DriverRemoteConnection(connstring, 'g')) logging.info('successfully logged in') except Exception as e: # Shouldn't really be so broad logging.error(e, exc_info=True) raise Exception('could not connect to Neptune, error: ' + str(e)) return g
def graphTraversal(self, neptune_endpoint=None, neptune_port=None, show_endpoint=True, connection=None, retry_limit=DEFAULT_RETRY_COUNT): if connection is None: connection = self._remoteConnection(neptune_endpoint, neptune_port, show_endpoint, retry_limit=retry_limit) graph = Graph() return graph.traversal().withRemote(connection)
def main(): # Read the variables args = parse_options() neptune = args.neptune v_file = args.vertices e_file = args.edges # Connect to Neptune neptune_constr = "ws://%s/gremlin" % neptune LOG.debug("Connecting to Neptune REST Endpoint %s", neptune) graph = Graph() g = graph.traversal().withRemote(DriverRemoteConnection(neptune_constr,'g')) # Load the nodes / vertices from csv with open(v_file) as csvfile: reader = csv.DictReader(csvfile, delimiter=',', quotechar='"') for row in reader: # Note that we are transforming ids because of a bug in GraphExp myid = id_transform(row["~id"]) print(myid) v = g.addV(row["~label"]).property(T.id, myid) for key in row: if key.startswith('~'): continue plabel, ptype = key.split(':') v.property(plabel, row[key]) v.next() # Load the edges with open(e_file) as csvfile: reader = csv.DictReader(csvfile, delimiter=',', quotechar='"') for row in reader: # Note that we are transforming ids because of a bug in GraphExp edge_id = id_transform(row["~id"]) from_id = id_transform(row["~from"]) to_id = id_transform(row["~to"]) print(edge_id) e = g.V(from_id).addE(row["~label"]).to( g.V(to_id) ).property(T.id, edge_id) for key in row: if key.startswith('~'): continue plabel, ptype = key.split(':') e.property(plabel, row[key]) e.next() print("Vertices: %s" % g.V().count().next()) print("Edges: %s" % g.E().count().next())
def list_user(username): graph = Graph() remote = DriverRemoteConnection('ws://' + os.environ['DB_ENDPOINT'] + ':8182/gremlin', 'g') g = graph.traversal().withRemote(remote) user = g.V().hasLabel('user').has('username', username).valueMap(False).by(__.unfold()) if not user.hasNext(): content = { 'response': '404: User not found' } return content, 404 properties = user.next() return jsonify(user=properties), 200
def separation(request): statics.load_statics(globals()) inputs = [x.strip() for x in request.POST['actor_names'].split(',')] graph = Graph() remoteConn = DriverRemoteConnection( 'ws://<neptune endpoint>.com:8182/gremlin', 'g') g = graph.traversal().withRemote(remoteConn) myList = g.V().has( 'name', inputs[0]).repeat(out().in_().simplePath()).until( has('name', inputs[1])).path().by('name').by('title').limit(40).toList() remoteConn.close() context = {'actors': request.POST['actor_names'], 'separation': myList} return render(request, 'polls/movie-results.html', context)
def kg_testing(inst=1, M=10, N=5, testing=False): # number of data points and properties m = M p = N if p > const.MAX_FEATURES: p = const.MAX_FEATURES # define the number of splits of each property s = p if p <= const.MAX_SPLITS else const.MAX_SPLITS # uniformly sample values between 0 and 1 as the data set dat = np.random.sample(size=(m, p)) # create column names (normally obtained by var.dtype.names) # # use an explicit dict to make sure that the order is preserved coln = [("col" + str(i), (i - 1)) for i in range(1, p + 1)] # create the data for the sample knowledge graph (only one brain) kgdat = create_kg(inst, dat, s, [[int(i) for i in np.asarray(coln)[:, 1]]]) # populate the knowledge graph into the remote DB # # instantiate a JanusGraph object graph = Graph() # connection to the remote server conn = DriverRemoteConnection(url_kg(inst), 'g') # get the remote graph traversal g = graph.traversal().withRemote(conn) # we only want to process the right brain print(kg(const.V, inst, coln, kgdat, g, False, testing)) # after building the knowledge graph, use the output of ocr to test the GloVe write # # call cognitive to produce the ocr output oret = ocr_testing() # get the location of the glove file src = cfg["instances"][inst]["src"]["index"] typ = cfg["instances"][inst]["src"]["types"]["glove"] gfl = cfg["instances"][inst]["sources"][src][typ]["connection"]["file"] # call extendglove to produce the GloVe output and transform it to an array # with the first term in each row being the key and all other terms are values rdat = extendglove(oret[0][0], gfl[0]) rdat = [(k, v) for k, v in list(rdat.items())[0:M]] # write the glove output to the knowledge graph print(kg(const.ENTS, inst, coln, rdat, g, False, testing)) # get the ocr data ... using the real way to get the ocr data here typ = cfg["instances"][inst]["src"]["types"]["ocrf"] pdfs = cfg["instances"][inst]["sources"][src][typ]["connection"]["files"] cdat = cognitive(const.OCR, pdfs, inst, False, testing) # write the ocr data to the graph print(kg(const.CONS, inst, coln, cdat[1:], g, True, testing)) # close the connection conn.close() # test the thought function with the default number of predictions 3 print(thought(inst, coln))