def test_append_traversal(self) -> None: g = __.V().hasLabel('Foo') w = __.where(__.inE().outV().hasLabel('Bar')) actual = append_traversal(g, w) expected = __.V().hasLabel('Foo').where( __.inE().outV().hasLabel('Bar')) self.assertEqual(actual, expected)
def upsert_edge(self, start_node_id: str, end_node_id: str, edge_id: str, edge_label: str, edge_properties: Dict[str, Any]) -> None: create_traversal = __.V().has(T.id, start_node_id).addE(edge_label).to( __.V().has(T.id, end_node_id)).property(T.id, edge_id) edge_traversal = self.get_graph().V().has(T.id, start_node_id).outE(edge_label).has(T.id, edge_id). \ fold(). \ coalesce(__.unfold(), create_traversal) edge_traversal = NeptuneSessionClient.update_entity_properties_on_traversal( edge_traversal, edge_properties) edge_traversal.next()
def __write_edges(self, g: traversal, edges: List[Dict], scan_id: str) -> None: """ Writes the edges to the labeled property graph :param g: The graph traversal source :param edges: A list of dictionaries for each edge :return: None """ cnt = 0 t = g for r in edges: to_id = f'{r["~to"]}_{scan_id}' from_id = f'{r["~from"]}_{scan_id}' t = ( t.addE(r["~label"]) .property(T.id, str(r["~id"])) .from_( __.V(from_id) .fold() .coalesce( __.unfold(), __.addV(self.parse_arn(r["~from"])["resource"]) .property(T.id, from_id) .property("scan_id", scan_id) .property("arn", r["~from"]), ) ) .to( __.V(to_id) .fold() .coalesce( __.unfold(), __.addV(self.parse_arn(r["~to"])["resource"]) .property(T.id, to_id) .property("scan_id", scan_id) .property("arn", r["~to"]), ) ) ) cnt += 1 if cnt % 100 == 0 or cnt == len(edges): try: self.logger.info( event=LogEvent.NeptunePeriodicWrite, msg=f"Writing edges {cnt} of {len(edges)}", ) t.next() t = g except Exception as err: self.logger.error(event=LogEvent.NeptuneLoadError, msg=str(err)) raise NeptuneLoadGraphException( f"Error loading edge {r} " f"with {str(t.bytecode)}" ) from err
def add_edges_for_label(rows): conn = self.gremlin_utils.remote_connection() g = self.gremlin_utils.traversal_source(connection=conn) t = g i = 0 for row in rows: entries = row.asDict() create_traversal = __.V(entries['~from']).addE(label).to( V(entries['~to'])).property(id, entries['~id']) for key, value in entries.items(): key = key.split(':')[0] if key not in ['~id', '~from', '~to', '~label']: create_traversal.property(key, value) t = t.V(entries['~from']).outE(label).hasId( entries['~id']).fold().coalesce(__.unfold(), create_traversal) i += 1 if i == batch_size: self.retry_query(t) t = g i = 0 if i > 0: self.retry_query(t) conn.close()
def __add_author(self, t, author, post_url): img_src = None if "img_src" in author.keys(): img_src = author['img_src'] img_height = author['img_height'] img_width = author['img_width'] t = ( t.V(author['name']) .fold() .coalesce( __.unfold(), __.addV('author') .property(T.id, author['name']) .property('name', author['name']) ).as_('p').addE('written_by').from_(__.V(post_url)) ) # Conditionally add the img_src, img_height, and img_width property if they do not exist if img_src: t = ( t.sideEffect( __.select('p').hasNot('img_src') .property('img_src', img_src) .property('img_height', img_height) .property('img_width', img_width) ) ) return t
def add_edge(self, u: Hashable, v: Hashable, metadata: dict): """ Add a new edge to the graph between two nodes. If the graph is directed, this edge will start (source) at the `u` node and end (target) at the `v` node. Arguments: u (Hashable): The source node ID v (Hashable): The target node ID metadata (dict): Optional metadata to associate with the edge Returns: Hashable: The edge ID, as inserted. """ try: self.get_edge_by_id(u, v) e = self._g.V().has(ID, u).outE().as_("e").inV().has(ID, v).select("e") except IndexError: if not self.has_node(u): self.add_node(u, {}) if not self.has_node(v): self.add_node(v, {}) e = ( self._g.V() .has(ID, u) .addE(EDGE_NAME) .as_("e") .to(__.V().has(ID, v)) .select("e") ) for key, val in metadata.items(): e = e.property(key, val) return e.toList()
def handle_youtube_video_added(self, video_id, user_id, name, description, location, preview_image_location, tags, added_date, timestamp): # make sure tags are unique (no duplicates) unique_tags = set(tags) logging.debug('SuggestedVideosService:handle_youtube_video_added, video ID: ' + str(video_id) + ', user ID: ' + str(user_id) + ', name: ' + name + ', description: ' + description + ', location: ' + location + ', preview_image_location: ' + preview_image_location + ', tags: ' + str(unique_tags) + ', timestamp: ' + str(timestamp)) # Note: building a single traversal, but broken into several steps for readability # locate user vertex traversal = self.graph.V().has('user', 'userId', user_id).as_('^user') # add video vertex traversal = traversal.addV('video').property('videoId', video_id)\ .property('added_date', added_date) \ .property('description', description) \ .property('name', name) \ .property('preview_image_location', preview_image_location) \ .as_('^video') # add edge from user to video vertex traversal = traversal.addE('uploaded').from_('^user').to('^video').property('added_date', added_date) # find vertices for tags and add edges from video vertex for tag in unique_tags: traversal = traversal.addE('taggedWith').from_('^video').to(__.coalesce( __.V().has('tag', 'name', tag), __.addV('tag').property('name', tag).property('tagged_date', added_date))) # execute the traversal traversal.iterate()
async def _add_edge(self, edge): """Convenience function for generating crud traversals.""" props = mapper.map_props_to_db(edge, edge.__mapping__) traversal = self._g.V(Binding('sid', edge.source.id)) traversal = traversal.addE(edge.__mapping__._label) traversal = traversal.to(__.V(Binding('tid', edge.target.id))) return await self._add_properties(traversal, props, edge)
def test_enforce_anonymous_child_traversal(self): g = traversal().withGraph(Graph()) g.V(0).addE("self").to(__.V(1)) try: g.V(0).addE("self").to(g.V(1)) assert False except TypeError: pass
def handle_user_rated_video(self, video_id, user_id, rating, timestamp): logging.debug( 'SuggestedVideosService:handle_user_rated_video, video id: ' + str(video_id) + ', user ID: ' + str(user_id) + ', rating: ' + str(rating) + ', timestamp: ' + str(timestamp)) # locate the video and user vertices and add an edge to represent the rating self.graph.V().has('user', 'userId', user_id) \ .addE('rated').to(__.V().has('video', 'videoId', video_id)) \ .property('rating', rating) \ .iterate()
def __add_tag(self, t, tag, post_url): t = ( t.V(tag) .fold() .coalesce( __.unfold(), __.addV('tag') .property(T.id, tag) .property('tag', tag) ).addE('tagged').from_(__.V(post_url)) ) return t
def create_edge(self, label, from_id, to_id, extra_properties): try: # Edge ID is the from id and to id plus the label, so we can have multiple different types of relations edge_id = '{}-{}-{}'.format(from_id, label, to_id) # create the edge or return it if it already exists self._do_next( self.g.E(edge_id).fold().coalesce( __.unfold(), __.V(from_id).addE(label).to(__.V(to_id)).property( T.id, edge_id)).property(DATE, utils.get_date_now())) # get the edge object edge = self.g.E(edge_id).limit(1) # add the reference data that was supplied as edge properties for k, v in extra_properties.items(): edge = edge.property(k, v) if len(extra_properties) != 0: edge.next() except ConstraintViolationException: pass
def test_upsert(self) -> None: g = __.V().has('User', 'key', 'jack').fold().coalesce( unfold(), addV('User').property(Cardinality.single, 'key', 'jack')). \ coalesce(__.has('email', '*****@*****.**'), __.property(Cardinality.single, 'email', '*****@*****.**')). \ coalesce(__.has('url', 'https://twitter.com/jack'), __.property(Cardinality.single, 'url', 'https://twitter.com/jack')) actual = ScriptTranslator.translateB('g', g) self.assertEqual( actual, '''g.V().has("User","key","jack").fold().coalesce(__.unfold(),__.addV("User").property(single,"key","jack")).coalesce(__.has("email","*****@*****.**"),__.property(single,"email","*****@*****.**")).coalesce(__.has("url","https://twitter.com/jack"),__.property(single,"url","https://twitter.com/jack"))''' ) # noqa: E501
def add_edges_for_label(rows): conn = self.remote_connection() g = self.traversal_source(conn) for row in rows: entries = row.asDict() create_traversal = __.V(row['~from']).addE(label).to( V(row['~to'])).property(id, row['~id']) for key, value in entries.iteritems(): key = key.split(':')[0] if key not in ['~id', '~from', '~to', '~label']: create_traversal.property(key, value) g.E(entries['~id']).fold().coalesce(__.unfold(), create_traversal).next() conn.close()
def __add_entities(self, t, entity, post_url): t = ( t.V(f'{entity["Text"]}_{entity["Type"]}') .fold() .coalesce( __.unfold(), __.addV(entity["Type"].lower()) .property(T.id, f'{entity["Text"]}_{entity["Type"]}') .property("text", entity["Text"]) .property("type", entity["Type"]) ).addE('found_in').from_(__.V(post_url)) .property('score', entity['Score']) ) return t
def _build_gremlin_edges(g: GraphTraversalSource, row: pd.Series) -> GraphTraversalSource: g = (g.V(str(row["~from"])).fold().coalesce( __.unfold(), _build_gremlin_vertices(__, { "~id": row["~from"], "~label": "Vertex" })).addE(row["~label"]).to( __.V(str(row["~to"])).fold().coalesce( __.unfold(), _build_gremlin_vertices(__, { "~id": row["~to"], "~label": "Vertex" })))) g = _build_gremlin_properties(g, row) return g
def _build_gremlin_insert_edges( g: GraphTraversalSource, row: pd.Series, use_header_cardinality: bool) -> GraphTraversalSource: g = (g.V(str(row["~from"])).fold().coalesce( __.unfold(), _build_gremlin_insert_vertices(__, { "~id": row["~from"], "~label": "Vertex" })).addE(row["~label"]).property(T.id, str(row["~id"])).to( __.V(str(row["~to"])).fold().coalesce( __.unfold(), _build_gremlin_insert_vertices(__, { "~id": row["~to"], "~label": "Vertex" })))) g = _set_properties(g, use_header_cardinality, row) return g
def add_edges_for_label(rows): try: conn = self.gremlin_utils.remote_connection() g = self.gremlin_utils.traversal_source(connection=conn) for row in rows: entries = row.asDict() create_traversal = __.V(row['~from']).addE(label).to( V(row['~to'])).property(id, row['~id']) for key, value in entries.items(): key = key.split(':')[0] if key not in ['~id', '~from', '~to', '~label']: create_traversal.property(key, value) g.E(entries['~id']).fold().coalesce( __.unfold(), create_traversal).next() conn.close() except GremlinServerError as err: print("Neptune error: {0}".format(err)) except: print("Unexpected error:", sys.exc_info()[0])
def upsert_edge(record, edge_mapping, g): edge_label = edge_mapping['edge_label'] # Simple logic, requiring that Vertices must exist before edge can be added. # Ensure all lookup values are present first out_lookup_values = get_lookup_values( record, edge_mapping['out_vertex']['lookup_properties']) in_lookup_values = get_lookup_values( record, edge_mapping['in_vertex']['lookup_properties']) if out_lookup_values is None or in_lookup_values is None: return try: traversal = g.V().hasLabel(edge_mapping['out_vertex']['vertex_label']) insertion_traversal = __.V().hasLabel( edge_mapping['out_vertex']['vertex_label']) for prop_key, lookup_value in out_lookup_values.items(): traversal = traversal.has(prop_key, lookup_value) insertion_traversal = insertion_traversal.has( prop_key, lookup_value) traversal = traversal.as_('out').V().hasLabel( edge_mapping['in_vertex']['vertex_label']) insertion_traversal = insertion_traversal.as_('out2').V().hasLabel( edge_mapping['in_vertex']['vertex_label']) for prop_key, lookup_value in in_lookup_values.items(): traversal = traversal.has(prop_key, lookup_value) insertion_traversal = insertion_traversal.has( prop_key, lookup_value) insertion_traversal = insertion_traversal.addE(edge_label).from_( 'out2') traversal = traversal.as_('in').inE(edge_label).as_('e').outV().where( P.eq('out')).fold().coalesce(__.unfold(), insertion_traversal).next() except: print("Edge error - skipping: {0}({1}) --{2}-> {3}({4})".format( edge_mapping['out_vertex']['vertex_label'], out_lookup_values, edge_label, edge_mapping['in_vertex']['vertex_label'], in_lookup_values))
def query_target_subgraph(self, target_id, tr_dict, transaction_value_cols, union_id_cols, dummied_col): """Extract 2nd degree subgraph of target transaction.Dump data into subgraph dict and n_feats dict. subgraph_dict: related transactions' id list and values through edges n_feats dict: related 1 degree vertex and transactions' embeded elements vectors. Usually after insert new test sample's vertex and edges into graphDB. Example: >>> query_target_subgraph('3661635', load_data_from_event(), 'M2_T,M3_F,M3_T,...') """ subgraph_dict = {} neighbor_list = [] neighbor_dict = {} transaction_embed_value_dict = {} ii = 0 s_t = dt.now() conn = self.gremlin_utils.remote_connection() g = self.gremlin_utils.traversal_source(connection=conn) target_name = target_id[(target_id.find('-') + 1):] feature_list = g.V().has(id, target_id).out().id().toList() for feat in feature_list: ii += 1 feat_name = feat[:feat.find('-')] feat_value = feat[(feat.find('-') + 1):] node_list = g.V().has( id, feat).both().limit(MAX_FEATURE_NODE).id().toList() target_and_conn_node_list = [int(target_name)] + [ int(target_conn_node[(target_conn_node.find('-') + 1):]) for target_conn_node in node_list ] target_and_conn_node_list = list(set(target_and_conn_node_list)) neighbor_list += target_and_conn_node_list nodes_and_feature_value_array = (target_and_conn_node_list, [feat_value] * len(target_and_conn_node_list)) subgraph_dict['target<>' + feat_name] = nodes_and_feature_value_array e_t = dt.now() logger.info( f'INSIDE query_target_subgraph: subgraph_dict used {(e_t - s_t).total_seconds()} seconds' ) new_s_t = e_t union_li = [ __.V().has(id, target_id).both().hasLabel(label).both().limit( MAX_FEATURE_NODE) for label in union_id_cols ] if len(union_id_cols) == 51: node_dict = g.V().has(id,target_id).union(__.both().hasLabel('card1').both().limit(MAX_FEATURE_NODE),\ union_li[1], union_li[2], union_li[3], union_li[4], union_li[5],\ union_li[6], union_li[7], union_li[8], union_li[9], union_li[10],\ union_li[11], union_li[12], union_li[13], union_li[14], union_li[15],\ union_li[16], union_li[17], union_li[18], union_li[19], union_li[20],\ union_li[21], union_li[22], union_li[23], union_li[24], union_li[25],\ union_li[26], union_li[27], union_li[28], union_li[29], union_li[30],\ union_li[31], union_li[32], union_li[33], union_li[34], union_li[35],\ union_li[36], union_li[37], union_li[38], union_li[39], union_li[40],\ union_li[41], union_li[42], union_li[43], union_li[44], union_li[45],\ union_li[46], union_li[47], union_li[48], union_li[49], union_li[50]).elementMap().toList() else: node_dict = g.V().has(id,target_id).union(__.both().hasLabel('card1').both().limit(MAX_FEATURE_NODE),\ union_li[1], union_li[2], union_li[3], union_li[4], union_li[5],\ union_li[6], union_li[7], union_li[8], union_li[9], union_li[10]).elementMap().toList() e_t = dt.now() logger.info( f'INSIDE query_target_subgraph: node_dict used {(e_t - new_s_t).total_seconds()} seconds.' ) new_s_t = e_t logger.debug(f'Found {len(node_dict)} nodes from graph dbs...') class Item(): def __init__(self, item): self.item = item def __hash__(self): return hash(self.item.get(list(self.item)[0])) def __eq__(self, other): if isinstance(other, self.__class__): return self.__hash__() == other.__hash__() else: return NotImplemented def __repr__(self): return "Item(%s)" % (self.item) node_dict = list(set([Item(node) for node in node_dict])) logger.debug(f'Found {len(node_dict)} nodes without duplication') for item in node_dict: item = item.item node = item.get(list(item)[0]) node_value = node[(node.find('-') + 1):] try: logger.debug( f'the props of node {node} is {item.get(attr_version_key)}' ) jsonVal = json.loads(item.get(attr_version_key)) neighbor_dict[node_value] = [ jsonVal[key] for key in transaction_value_cols ] logger.debug( f'neighbor pair is {node_value}, {neighbor_dict[node_value]}' ) except json.JSONDecodeError: logger.warn( f'Malform node value {node} is {item.get(attr_version_key)}, run below cmd to remove it' ) logger.info(f'g.V(\'{node}\').drop()') target_value = target_id[(target_id.find('-') + 1):] jsonVal = json.loads(tr_dict[0].get(attr_version_key)) neighbor_dict[target_value] = [ jsonVal[key] for key in transaction_value_cols ] logger.info( f'INSIDE query_target_subgraph: neighbor_dict used {(e_t - new_s_t).total_seconds()} seconds.' ) attr_cols = ['val' + str(x) for x in range(1, 391)] for attr in feature_list: attr_name = attr[:attr.find('-')] attr_value = attr[(attr.find('-') + 1):] attr_dict = g.V().has(id, attr).valueMap().toList()[0] logger.debug(f'attr is {attr}, dict is {attr_dict}') jsonVal = json.loads(attr_dict.get(attr_version_key)[0]) attr_dict = [float(jsonVal[key]) for key in attr_cols] attr_input_dict = {} attr_input_dict[attr_value] = attr_dict transaction_embed_value_dict[attr_name] = attr_input_dict e_t = dt.now() logger.info( f'INSIDE query_target_subgraph: transaction_embed_value_dict used {(e_t - new_s_t).total_seconds()} seconds. Total test cost {(e_t - s_t).total_seconds()} seconds.' ) new_s_t = e_t transaction_embed_value_dict['target'] = neighbor_dict conn.close() return subgraph_dict, transaction_embed_value_dict
def test_upsert_thrice(self) -> None: executor = mock.Mock(wraps=self.get_proxy().query_executor()) # test that we will insert db_name = Fixtures.next_database() database_uri = f'database://{db_name}' vertex_type = VertexType( label=VertexTypes.Database.value.label, properties=VertexTypes.Database.value.properties + tuple([Property(name='foo', type=GremlinType.String)])) exists = self._get(label=vertex_type, key=database_uri, extra_traversal=__.count()) self.assertEqual(exists, 0) _upsert(executor=executor, g=self.get_proxy().g, key_property_name=self.get_proxy().key_property_name, label=vertex_type, key=database_uri, name='test', foo='bar') exists = self._get(label=vertex_type, key=database_uri, extra_traversal=__.count()) self.assertEqual(exists, 1) id = self._get(label=vertex_type, key=database_uri, extra_traversal=__.id()) executor.reset_mock() _upsert(executor=executor, g=self.get_proxy().g, key_property_name=self.get_proxy().key_property_name, label=vertex_type, key=database_uri, name='test') exists = self._get(label=vertex_type, key=database_uri, extra_traversal=__.count()) self.assertEqual(exists, 1) self.assertEqual(executor.call_count, 2) # first one is the get: self.assertEqual(executor.call_args_list[0][1]['query'].bytecode, __.V(id).valueMap(True).bytecode) # the second one should be like self.assertEqual(executor.call_args_list[1][1]['query'].bytecode, __.V(id).id().bytecode) executor.reset_mock() _upsert(executor=executor, g=self.get_proxy().g, key_property_name=self.get_proxy().key_property_name, label=vertex_type, key=database_uri, name='test2', foo=None) exists = self._get(label=vertex_type, key=database_uri, extra_traversal=__.count()) self.assertEqual(exists, 1) self.assertEqual(executor.call_count, 2) # first one is the get: self.assertEqual(executor.call_args_list[0][1]['query'].bytecode, __.V(id).valueMap(True).bytecode) # the second one should be like self.assertEqual( executor.call_args_list[1][1]['query'].bytecode, __.V(id).sideEffect(__.properties('foo').drop()).property( Cardinality.single, 'name', 'test2').id().bytecode)
def query_target_subgraph(self, target_id, tr_dict, transaction_value_cols, union_id_cols, dummied_col): """Extract 2nd degree subgraph of target transaction.Dump data into subgraph dict and n_feats dict. subgraph_dict: related transactions' id list and values through edges n_feats dict: related 1 degree vertex and transactions' embeded elements vectors. Usually after insert new test sample's vertex and edges into graphDB. Example: >>> query_target_subgraph('3661635', load_data_from_event(), 'M2_T,M3_F,M3_T,...') """ subgraph_dict = {} neighbor_list = [] neighbor_dict = {} transaction_embed_value_dict = {} ii = 0 s_t = dt.now() conn = self.gremlin_utils.remote_connection() g = self.gremlin_utils.traversal_source(connection=conn) target_name = target_id[(target_id.find('-')+1):] feature_list = g.V().has(id,target_id).out().id().toList() for feat in feature_list: ii += 1 feat_name = feat[:feat.find('-')] feat_value = feat[(feat.find('-')+1):] node_list = g.V().has(id,feat).both().limit(MAX_FEATURE_NODE).id().toList() target_and_conn_node_list = [int(target_name)]+[int(target_conn_node[(target_conn_node.find('-')+1):]) for target_conn_node in node_list] target_and_conn_node_list = list(set(target_and_conn_node_list)) neighbor_list += target_and_conn_node_list nodes_and_feature_value_array = (target_and_conn_node_list,[feat_value]*len(target_and_conn_node_list)) subgraph_dict['target<>'+feat_name] = nodes_and_feature_value_array e_t = dt.now() logger.info(f'INSIDE query_target_subgraph: subgraph_dict used {(e_t - s_t).total_seconds()} seconds') logger.info(f'subgraph_dict len: {len(subgraph_dict.keys())} key: {subgraph_dict.keys()}') logger.info(f'subgraph_dict: {subgraph_dict}') new_s_t = e_t union_li = [__.V().has(id,target_id).both().hasLabel(label).both().limit(MAX_FEATURE_NODE) for label in union_id_cols] logger.info(f'union_id_cols len: {len(union_id_cols)} key: {union_id_cols}') logger.info(f'union_li len: {len(union_li)} key: {union_li}') if len(union_id_cols) == 51: node_dict = g.V().has(id,target_id).union(__.both().hasLabel('card1').both().limit(MAX_FEATURE_NODE),\ union_li[1], union_li[2], union_li[3], union_li[4], union_li[5],\ union_li[6], union_li[7], union_li[8], union_li[9], union_li[10],\ union_li[11], union_li[12], union_li[13], union_li[14], union_li[15],\ union_li[16], union_li[17], union_li[18], union_li[19], union_li[20],\ union_li[21], union_li[22], union_li[23], union_li[24], union_li[25],\ union_li[26], union_li[27], union_li[28], union_li[29], union_li[30],\ union_li[31], union_li[32], union_li[33], union_li[34], union_li[35],\ union_li[36], union_li[37], union_li[38], union_li[39], union_li[40],\ union_li[41], union_li[42], union_li[43], union_li[44], union_li[45],\ union_li[46], union_li[47], union_li[48], union_li[49], union_li[50]).elementMap().toList() else: node_dict = g.V().has(id,target_id).union(__.both().hasLabel('card1').both().limit(MAX_FEATURE_NODE),\ union_li[1], union_li[2], union_li[3], union_li[4], union_li[5],\ union_li[6], union_li[7], union_li[8], union_li[9], union_li[10]).elementMap().toList() e_t = dt.now() logger.info(f'INSIDE query_target_subgraph: node_dict used {(e_t - new_s_t).total_seconds()} seconds.') new_s_t = e_t logger.info(f'node_dict len: {len(node_dict)} key: {node_dict}') for item in node_dict: node = item.get(list(item)[0]) node_value = node[(node.find('-')+1):] neighbor_dict[node_value] = [item.get(key) for key in transaction_value_cols] target_value = target_id[(target_id.find('-')+1):] neighbor_dict[target_value] = [tr_dict[0].get(key) for key in transaction_value_cols] logger.info(f'INSIDE query_target_subgraph: node_dict used {(e_t - new_s_t).total_seconds()} seconds.') logger.info(f'neighbor_dict len: {len(neighbor_dict.keys())} key: {neighbor_dict.keys()}') logger.info(f'neighbor_dict: {neighbor_dict}') attr_cols = ['val'+str(x) for x in range(1,391)] for attr in feature_list: attr_name = attr[:attr.find('-')] attr_value = attr[(attr.find('-')+1):] attr_dict = g.V().has(id,attr).valueMap().toList()[0] attr_dict = [attr_dict.get(key)[-1] for key in attr_cols] attr_input_dict = {} attr_input_dict[attr_value] = attr_dict transaction_embed_value_dict[attr_name] = attr_input_dict e_t = dt.now() logger.info(f'INSIDE query_target_subgraph: transaction_embed_value_dict used {(e_t - new_s_t).total_seconds()} seconds. Total test cost {(e_t - s_t).total_seconds()} seconds.') new_s_t = e_t transaction_embed_value_dict['target'] = neighbor_dict conn.close() logger.info(f'transaction_embed_value_dict len: {len(transaction_embed_value_dict.keys())} key: {transaction_embed_value_dict.keys()}') logger.info(f'transaction_embed_value_dict: {transaction_embed_value_dict}') return subgraph_dict, transaction_embed_value_dict
def test_translations(self): g = traversal().withGraph(Graph()) tests = list() # 0 tests.append([g.V(), "g.V()"]) # 1 tests.append([g.V('1', '2', '3', '4'), "g.V('1','2','3','4')"]) # 2 tests.append([g.V('3').valueMap(True), "g.V('3').valueMap(True)"]) # 3 tests.append([g.V().constant(5), "g.V().constant(5)"]) # 4 tests.append([g.V().constant(1.5), "g.V().constant(1.5)"]) # 5 tests.append([g.V().constant('Hello'), "g.V().constant('Hello')"]) # 6 tests.append([g.V().hasLabel('airport').limit(5), "g.V().hasLabel('airport').limit(5)"]) # 7 tests.append([g.V().hasLabel(within('a', 'b', 'c')), "g.V().hasLabel(within(['a','b','c']))"]) # 8 tests.append([g.V().hasLabel('airport', 'continent').out().limit(5), "g.V().hasLabel('airport','continent').out().limit(5)"]) # 9 tests.append([g.V().hasLabel('airport').out().values('code').limit(5), "g.V().hasLabel('airport').out().values('code').limit(5)"]) # 10 tests.append([g.V('3').as_('a').out('route').limit(10).where(eq('a')).by('region'), "g.V('3').as('a').out('route').limit(10).where(eq('a')).by('region')"]) # 11 tests.append([g.V('3').repeat(__.out('route').simplePath()).times(2).path().by('code'), "g.V('3').repeat(__.out('route').simplePath()).times(2).path().by('code')"]) # 12 tests.append([g.V().hasLabel('airport').out().has('region', 'US-TX').values('code').limit(5), "g.V().hasLabel('airport').out().has('region','US-TX').values('code').limit(5)"]) # 13 tests.append([g.V().hasLabel('airport').union(__.values('city'), __.values('region')).limit(5), "g.V().hasLabel('airport').union(__.values('city'),__.values('region')).limit(5)"]) # 14 tests.append([g.V('3').as_('a').out('route', 'routes'), "g.V('3').as('a').out('route','routes')"]) # 15 tests.append([g.V().where(__.values('runways').is_(5)), "g.V().where(__.values('runways').is(5))"]) # 16 tests.append([g.V('3').repeat(__.out().simplePath()).until(__.has('code', 'AGR')).path().by('code').limit(5), "g.V('3').repeat(__.out().simplePath()).until(__.has('code','AGR')).path().by('code').limit(5)"]) # 17 tests.append([g.V().hasLabel('airport').order().by(__.id()), "g.V().hasLabel('airport').order().by(__.id())"]) # 18 tests.append([g.V().hasLabel('airport').order().by(T.id), "g.V().hasLabel('airport').order().by(T.id)"]) # 19 tests.append([g.V().hasLabel('airport').order().by(__.id(),Order.desc), "g.V().hasLabel('airport').order().by(__.id(),Order.desc)"]) # 20 tests.append([g.V().hasLabel('airport').order().by('code',Order.desc), "g.V().hasLabel('airport').order().by('code',Order.desc)"]) # 21 tests.append([g.V('1', '2', '3').local(__.out().out().dedup().fold()), "g.V('1','2','3').local(__.out().out().dedup().fold())"]) # 22 tests.append([g.V('3').out().path().count(Scope.local), "g.V('3').out().path().count(Scope.local)"]) # 23 tests.append([g.E().count(), "g.E().count()"]) # 24 tests.append([g.V('5').outE('route').inV().path().limit(10), "g.V('5').outE('route').inV().path().limit(10)"]) # 25 tests.append([g.V('5').propertyMap().select(Column.keys), "g.V('5').propertyMap().select(Column.keys)"]) # 26 tests.append([g.V('5').propertyMap().select(Column.values), "g.V('5').propertyMap().select(Column.values)"]) # 27 tests.append([g.V('3').values('runways').math('_ + 1'), "g.V('3').values('runways').math('_ + 1')"]) # 28 tests.append([g.V('3').emit().repeat(__.out().simplePath()).times(3).limit(5).path(), "g.V('3').emit().repeat(__.out().simplePath()).times(3).limit(5).path()"]) # 29 tests.append([g.V().match(__.as_('a').has('code', 'LHR').as_('b')).select('b').by('code'), "g.V().match(__.as('a').has('code','LHR').as('b')).select('b').by('code')"]) # 30 tests.append([g.V().has('test-using-keyword-as-property','repeat'), "g.V().has('test-using-keyword-as-property','repeat')"]) # 31 tests.append([g.V('1').addE('test').to(__.V('4')), "g.V('1').addE('test').to(__.V('4'))"]) # 32 tests.append([g.V().values('runways').max(), "g.V().values('runways').max()"]) # 33 tests.append([g.V().values('runways').min(), "g.V().values('runways').min()"]) # 34 tests.append([g.V().values('runways').sum(), "g.V().values('runways').sum()"]) # 35 tests.append([g.V().values('runways').mean(), "g.V().values('runways').mean()"]) # 36 tests.append([g.withSack(0).V('3', '5').sack(Operator.sum).by('runways').sack(), "g.withSack(0).V('3','5').sack(Operator.sum).by('runways').sack()"]) # 37 tests.append([g.V('3').values('runways').store('x').V('4').values('runways').store('x').by(__.constant(1)).V('6').store('x').by(__.constant(1)).select('x').unfold().sum(), "g.V('3').values('runways').store('x').V('4').values('runways').store('x').by(__.constant(1)).V('6').store('x').by(__.constant(1)).select('x').unfold().sum()"]) # 38 tests.append([g.inject(3, 4, 5), "g.inject(3,4,5)"]) # 39 tests.append([g.inject([3, 4, 5]), "g.inject([3, 4, 5])"]) # 40 tests.append([g.inject(3, 4, 5).count(), "g.inject(3,4,5).count()"]) # 41 tests.append([g.V().has('runways', gt(5)).count(), "g.V().has('runways',gt(5)).count()"]) # 42 tests.append([g.V().has('runways', lte(5.3)).count(), "g.V().has('runways',lte(5.3)).count()"]) # 43 tests.append([g.V().has('code', within(123,124)), "g.V().has('code',within([123,124]))"]) # 44 tests.append([g.V().has('code', within(123, 'abc')), "g.V().has('code',within([123,'abc']))"]) # 45 tests.append([g.V().has('code', within('abc', 123)), "g.V().has('code',within(['abc',123]))"]) # 46 tests.append([g.V().has('code', within('abc', 'xyz')), "g.V().has('code',within(['abc','xyz']))"]) # 47 tests.append([g.V('1', '2').has('region', P.within('US-TX','US-GA')), "g.V('1','2').has('region',within(['US-TX','US-GA']))"]) # 48 tests.append([g.V().and_(__.has('runways', P.gt(5)), __.has('region','US-TX')), "g.V().and(__.has('runways',gt(5)),__.has('region','US-TX'))"]) # 49 tests.append([g.V().union(__.has('runways', gt(5)), __.has('region','US-TX')), "g.V().union(__.has('runways',gt(5)),__.has('region','US-TX'))"]) # 50 tests.append([g.V('3').choose(__.values('runways').is_(3),__.constant('three'),__.constant('not three')), "g.V('3').choose(__.values('runways').is(3),__.constant('three'),__.constant('not three'))"]) # 51 tests.append([g.V('3').choose(__.values('runways')).option(1,__.constant('three')).option(2,__.constant('not three')), "g.V('3').choose(__.values('runways')).option(1,__.constant('three')).option(2,__.constant('not three'))"]) # 52 tests.append([g.V('3').choose(__.values('runways')).option(1.5,__.constant('one and a half')).option(2,__.constant('not three')), "g.V('3').choose(__.values('runways')).option(1.5,__.constant('one and a half')).option(2,__.constant('not three'))"]) # 53 tests.append([g.V('3').repeat(__.out().simplePath()).until(__.loops().is_(1)).count(), "g.V('3').repeat(__.out().simplePath()).until(__.loops().is(1)).count()"]) # 54 tests.append([g.V().hasLabel('airport').limit(20).group().by('region').by('code').order(Scope.local).by(Column.keys), "g.V().hasLabel('airport').limit(20).group().by('region').by('code').order(Scope.local).by(Column.keys)"]) # 55 tests.append([g.V('1').as_('a').V('2').as_('a').select(Pop.all_, 'a'), "g.V('1').as('a').V('2').as('a').select(Pop.all,'a')"]) # 56 tests.append([g.addV('test').property(Cardinality.set_, 'p1', 10), "g.addV('test').property(Cardinality.set,'p1',10)"]) # 57 tests.append([g.addV('test').property(Cardinality.list_, 'p1', 10), "g.addV('test').property(Cardinality.list,'p1',10)"]) # 58 tests.append([g.addV('test').property(Cardinality.single, 'p1', 10), "g.addV('test').property(Cardinality.single,'p1',10)"]) # 59 tests.append([g.V().limit(5).order().by(T.label), "g.V().limit(5).order().by(T.label)"]) # 60 tests.append([g.V().range(1, 5), "g.V().range(1,5)"]) # 61 tests.append([g.addV('test').property('p1', 123), "g.addV('test').property('p1',123)"]) # 62 tests.append([g.addV('test').property('date',datetime(2021, 2, 1, 9, 30)), "g.addV('test').property('date',new Date(121,2,1,9,30,0))"]) # 63 tests.append([g.addV('test').property('date',datetime(2021, 2, 1)), "g.addV('test').property('date',new Date(121,2,1,0,0,0))"]) # 64 tests.append([g.addE('route').from_(__.V('1')).to(__.V('2')), "g.addE('route').from(__.V('1')).to(__.V('2'))"]) # 65 tests.append([g.withSideEffect('a', [1, 2]).V('3').select('a'), "g.withSideEffect('a',[1, 2]).V('3').select('a')"]) # 66 tests.append([g.withSideEffect('a', 1).V('3').select('a'), "g.withSideEffect('a',1).V('3').select('a')"]) # 67 tests.append([g.withSideEffect('a', 'abc').V('3').select('a'), "g.withSideEffect('a','abc').V('3').select('a')"]) # 68 tests.append([g.V().has('airport', 'region', 'US-NM').limit(3).values('elev').fold().index(), "g.V().has('airport','region','US-NM').limit(3).values('elev').fold().index()"]) # 69 tests.append([g.V('3').repeat(__.timeLimit(1000).out().simplePath()).until(__.has('code', 'AGR')).path(), "g.V('3').repeat(__.timeLimit(1000).out().simplePath()).until(__.has('code','AGR')).path()"]) # 70 tests.append([g.V().hasLabel('airport').where(__.values('elev').is_(gt(14000))), "g.V().hasLabel('airport').where(__.values('elev').is(gt(14000)))"]) # 71 tests.append([g.V().hasLabel('airport').where(__.out().count().is_(gt(250))).values('code'), "g.V().hasLabel('airport').where(__.out().count().is(gt(250))).values('code')"]) # 72 tests.append([g.V().hasLabel('airport').filter(__.out().count().is_(gt(250))).values('code'), "g.V().hasLabel('airport').filter(__.out().count().is(gt(250))).values('code')"]) # 73 tests.append([g.withSack(0). V('3'). repeat(__.outE('route').sack(Operator.sum).by('dist').inV()). until(__.has('code', 'AGR').or_().loops().is_(4)). has('code', 'AGR'). local(__.union(__.path().by('code').by('dist'),__.sack()).fold()). limit(10), "g.withSack(0).V('3').repeat(__.outE('route').sack(Operator.sum).by('dist').inV()).until(__.has('code','AGR').or().loops().is(4)).has('code','AGR').local(__.union(__.path().by('code').by('dist'),__.sack()).fold()).limit(10)"]) # 74 tests.append([g.addV().as_('a').addV().as_('b').addE('knows').from_('a').to('b'), "g.addV().as('a').addV().as('b').addE('knows').from('a').to('b')"]) # 75 tests.append([g.addV('Person').as_('a').addV('Person').as_('b').addE('knows').from_('a').to('b'), "g.addV('Person').as('a').addV('Person').as('b').addE('knows').from('a').to('b')"]) # 76 tests.append([g.V('3').project('Out','In').by(__.out().count()).by(__.in_().count()), "g.V('3').project('Out','In').by(__.out().count()).by(__.in().count())"]) # 77 tests.append([g.V('44').out().aggregate('a').out().where(within('a')).path(), "g.V('44').out().aggregate('a').out().where(within(['a'])).path()"]) # 78 tests.append([g.V().has('date', datetime(2021, 2, 22)), "g.V().has('date',new Date(121,2,22,0,0,0))"]) # 79 tests.append([g.V().has('date', within(datetime(2021, 2, 22), datetime(2021, 1, 1))), "g.V().has('date',within([new Date(121,2,22,0,0,0),new Date(121,1,1,0,0,0)]))"]) # 80 tests.append([g.V().has('date', between(datetime(2021, 1, 1), datetime(2021, 2, 22))), "g.V().has('date',between(new Date(121,1,1,0,0,0),new Date(121,2,22,0,0,0)))"]) # 81 tests.append([g.V().has('date', inside(datetime(2021, 1, 1),datetime(2021, 2, 22))), "g.V().has('date',inside(new Date(121,1,1,0,0,0),new Date(121,2,22,0,0,0)))"]) # 82 tests.append([g.V().has('date', P.gt(datetime(2021, 1, 1, 9, 30))), "g.V().has('date',gt(new Date(121,1,1,9,30,0)))"]) # 83 tests.append([g.V().has('runways', between(3,5)), "g.V().has('runways',between(3,5))"]) # 84 tests.append([g.V().has('runways', inside(3,5)), "g.V().has('runways',inside(3,5))"]) # 85 tests.append([g.V('44').outE().elementMap(), "g.V('44').outE().elementMap()"]) # 86 tests.append([g.V('44').valueMap().by(__.unfold()), "g.V('44').valueMap().by(__.unfold())"]) # 87 tests.append([g.V('44').valueMap().with_(WithOptions.tokens,WithOptions.labels), "g.V('44').valueMap().with(WithOptions.tokens,WithOptions.labels)"]) # 88 tests.append([g.V('44').valueMap().with_(WithOptions.tokens), "g.V('44').valueMap().with(WithOptions.tokens)"]) # 89 tests.append([g.withStrategies(ReadOnlyStrategy()).addV('test'), "g.withStrategies(new ReadOnlyStrategy()).addV('test')"]) # 90 strategy = SubgraphStrategy(vertices=__.has('region', 'US-TX'), edges=__.hasLabel('route')) tests.append([g.withStrategies(strategy).V().count(), "g.withStrategies(new SubgraphStrategy(vertices:__.has('region','US-TX'),edges:__.hasLabel('route'))).V().count()"]) # 91 strategy = SubgraphStrategy(vertex_properties=__.hasNot('runways')) tests.append([g.withStrategies(strategy).V().count(), "g.withStrategies(new SubgraphStrategy(vertexProperties:__.hasNot('runways'))).V().count()"]) # 92 strategy = SubgraphStrategy(vertices=__.has('region', 'US-TX'),vertex_properties=__.hasNot('runways')) tests.append([g.withStrategies(strategy).V().count(), "g.withStrategies(new SubgraphStrategy(vertices:__.has('region','US-TX'),vertexProperties:__.hasNot('runways'))).V().count()"]) # 93 strategy = SubgraphStrategy(vertices=__.has('region', 'US-TX'), edges=__.hasLabel('route')) tests.append([g.withStrategies(ReadOnlyStrategy(),strategy).V().count(), "g.withStrategies(new ReadOnlyStrategy(),new SubgraphStrategy(vertices:__.has('region','US-TX'),edges:__.hasLabel('route'))).V().count()"]) # 94 strategy = SubgraphStrategy(vertices=__.has('region', 'US-TX')) tests.append([g.withStrategies(ReadOnlyStrategy(), strategy).V().count(), "g.withStrategies(new ReadOnlyStrategy(),new SubgraphStrategy(vertices:__.has('region','US-TX'))).V().count()"]) # 95 tests.append([g.with_('evaluationTimeout', 500).V().count(), "g.withStrategies(new OptionsStrategy(evaluationTimeout:500)).V().count()"]) # 96 tests.append([g.withStrategies(OptionsStrategy({'evaluationTimeout': 500})).V().count(), "g.withStrategies(new OptionsStrategy(evaluationTimeout:500)).V().count()"]) # 97 tests.append([g.withStrategies(PartitionStrategy(partition_key="partition", write_partition="a", read_partitions=["a"])).addV('test'), "g.withStrategies(new PartitionStrategy(partitionKey:'partition',writePartition:'a',readPartitions:['a'])).addV('test')"]) # 98 tests.append([g.withComputer().V().shortestPath().with_(ShortestPath.target, __.has('name','peter')), "g.withStrategies(new VertexProgramStrategy()).V().shortestPath().with('~tinkerpop.shortestPath.target',__.has('name','peter'))"]) tlr = Translator().of('g') for t in range(len(tests)): a = tlr.translate(tests[t][0].bytecode) assert a == tests[t][1]