Ejemplo n.º 1
0
 def test_append_traversal(self) -> None:
     g = __.V().hasLabel('Foo')
     w = __.where(__.inE().outV().hasLabel('Bar'))
     actual = append_traversal(g, w)
     expected = __.V().hasLabel('Foo').where(
         __.inE().outV().hasLabel('Bar'))
     self.assertEqual(actual, expected)
Ejemplo n.º 2
0
    def upsert_edge(self, start_node_id: str, end_node_id: str, edge_id: str,
                    edge_label: str, edge_properties: Dict[str, Any]) -> None:
        create_traversal = __.V().has(T.id, start_node_id).addE(edge_label).to(
            __.V().has(T.id, end_node_id)).property(T.id, edge_id)
        edge_traversal = self.get_graph().V().has(T.id, start_node_id).outE(edge_label).has(T.id, edge_id). \
            fold(). \
            coalesce(__.unfold(), create_traversal)

        edge_traversal = NeptuneSessionClient.update_entity_properties_on_traversal(
            edge_traversal, edge_properties)
        edge_traversal.next()
 def __write_edges(self, g: traversal, edges: List[Dict], scan_id: str) -> None:
     """
     Writes the edges to the labeled property graph
     :param g: The graph traversal source
     :param edges: A list of dictionaries for each edge
     :return: None
     """
     cnt = 0
     t = g
     for r in edges:
         to_id = f'{r["~to"]}_{scan_id}'
         from_id = f'{r["~from"]}_{scan_id}'
         t = (
             t.addE(r["~label"])
             .property(T.id, str(r["~id"]))
             .from_(
                 __.V(from_id)
                 .fold()
                 .coalesce(
                     __.unfold(),
                     __.addV(self.parse_arn(r["~from"])["resource"])
                     .property(T.id, from_id)
                     .property("scan_id", scan_id)
                     .property("arn", r["~from"]),
                 )
             )
             .to(
                 __.V(to_id)
                 .fold()
                 .coalesce(
                     __.unfold(),
                     __.addV(self.parse_arn(r["~to"])["resource"])
                     .property(T.id, to_id)
                     .property("scan_id", scan_id)
                     .property("arn", r["~to"]),
                 )
             )
         )
         cnt += 1
         if cnt % 100 == 0 or cnt == len(edges):
             try:
                 self.logger.info(
                     event=LogEvent.NeptunePeriodicWrite,
                     msg=f"Writing edges {cnt} of {len(edges)}",
                 )
                 t.next()
                 t = g
             except Exception as err:
                 self.logger.error(event=LogEvent.NeptuneLoadError, msg=str(err))
                 raise NeptuneLoadGraphException(
                     f"Error loading edge {r} " f"with {str(t.bytecode)}"
                 ) from err
        def add_edges_for_label(rows):

            conn = self.gremlin_utils.remote_connection()
            g = self.gremlin_utils.traversal_source(connection=conn)

            t = g
            i = 0
            for row in rows:
                entries = row.asDict()
                create_traversal = __.V(entries['~from']).addE(label).to(
                    V(entries['~to'])).property(id, entries['~id'])
                for key, value in entries.items():
                    key = key.split(':')[0]
                    if key not in ['~id', '~from', '~to', '~label']:
                        create_traversal.property(key, value)
                t = t.V(entries['~from']).outE(label).hasId(
                    entries['~id']).fold().coalesce(__.unfold(),
                                                    create_traversal)
                i += 1
                if i == batch_size:
                    self.retry_query(t)
                    t = g
                    i = 0
            if i > 0:
                self.retry_query(t)

            conn.close()
Ejemplo n.º 5
0
 def __add_author(self, t, author, post_url):
     img_src = None
     if "img_src" in author.keys():
         img_src = author['img_src']
         img_height = author['img_height']
         img_width = author['img_width']
     t = (
         t.V(author['name'])
         .fold()
         .coalesce(
             __.unfold(),
             __.addV('author')
             .property(T.id, author['name'])
             .property('name', author['name'])
         ).as_('p').addE('written_by').from_(__.V(post_url))
     )
     # Conditionally add the img_src, img_height, and img_width property if they do not exist
     if img_src:
         t = (
             t.sideEffect(
                 __.select('p').hasNot('img_src')
                 .property('img_src', img_src)
                 .property('img_height', img_height)
                 .property('img_width', img_width)
             )
         )
     return t
Ejemplo n.º 6
0
    def add_edge(self, u: Hashable, v: Hashable, metadata: dict):
        """
        Add a new edge to the graph between two nodes.

        If the graph is directed, this edge will start (source) at the `u` node
        and end (target) at the `v` node.

        Arguments:
            u (Hashable): The source node ID
            v (Hashable): The target node ID
            metadata (dict): Optional metadata to associate with the edge

        Returns:
            Hashable: The edge ID, as inserted.

        """
        try:
            self.get_edge_by_id(u, v)
            e = self._g.V().has(ID, u).outE().as_("e").inV().has(ID, v).select("e")
        except IndexError:
            if not self.has_node(u):
                self.add_node(u, {})
            if not self.has_node(v):
                self.add_node(v, {})
            e = (
                self._g.V()
                .has(ID, u)
                .addE(EDGE_NAME)
                .as_("e")
                .to(__.V().has(ID, v))
                .select("e")
            )
        for key, val in metadata.items():
            e = e.property(key, val)
        return e.toList()
Ejemplo n.º 7
0
    def handle_youtube_video_added(self, video_id, user_id, name, description, location, preview_image_location,
                                   tags, added_date, timestamp):
        # make sure tags are unique (no duplicates)
        unique_tags = set(tags)

        logging.debug('SuggestedVideosService:handle_youtube_video_added, video ID: ' + str(video_id) +
                      ', user ID: ' + str(user_id) + ', name: ' + name + ', description: ' + description +
                      ', location: ' + location + ', preview_image_location: ' + preview_image_location +
                      ', tags: ' + str(unique_tags) + ', timestamp: ' + str(timestamp))

        # Note: building a single traversal, but broken into several steps for readability

        # locate user vertex
        traversal = self.graph.V().has('user', 'userId', user_id).as_('^user')

        # add video vertex
        traversal = traversal.addV('video').property('videoId', video_id)\
            .property('added_date', added_date) \
            .property('description', description) \
            .property('name', name) \
            .property('preview_image_location', preview_image_location) \
            .as_('^video')

        # add edge from user to video vertex
        traversal = traversal.addE('uploaded').from_('^user').to('^video').property('added_date', added_date)

        # find vertices for tags and add edges from video vertex
        for tag in unique_tags:
            traversal = traversal.addE('taggedWith').from_('^video').to(__.coalesce(
                __.V().has('tag', 'name', tag),
                __.addV('tag').property('name', tag).property('tagged_date', added_date)))

        # execute the traversal
        traversal.iterate()
Ejemplo n.º 8
0
 async def _add_edge(self, edge):
     """Convenience function for generating crud traversals."""
     props = mapper.map_props_to_db(edge, edge.__mapping__)
     traversal = self._g.V(Binding('sid', edge.source.id))
     traversal = traversal.addE(edge.__mapping__._label)
     traversal = traversal.to(__.V(Binding('tid', edge.target.id)))
     return await self._add_properties(traversal, props, edge)
Ejemplo n.º 9
0
    def test_enforce_anonymous_child_traversal(self):
        g = traversal().withGraph(Graph())
        g.V(0).addE("self").to(__.V(1))

        try:
            g.V(0).addE("self").to(g.V(1))
            assert False
        except TypeError:
            pass
    def handle_user_rated_video(self, video_id, user_id, rating, timestamp):

        logging.debug(
            'SuggestedVideosService:handle_user_rated_video, video id: ' +
            str(video_id) + ', user ID: ' + str(user_id) + ', rating: ' +
            str(rating) + ', timestamp: ' + str(timestamp))

        # locate the video and user vertices and add an edge to represent the rating
        self.graph.V().has('user', 'userId', user_id) \
            .addE('rated').to(__.V().has('video', 'videoId', video_id)) \
            .property('rating', rating) \
            .iterate()
Ejemplo n.º 11
0
    def __add_tag(self, t, tag, post_url):
        t = (
            t.V(tag)
            .fold()
            .coalesce(
                __.unfold(),
                __.addV('tag')
                .property(T.id, tag)
                .property('tag', tag)
            ).addE('tagged').from_(__.V(post_url))
        )

        return t
Ejemplo n.º 12
0
    def create_edge(self, label, from_id, to_id, extra_properties):
        try:
            # Edge ID is the from id and to id plus the label, so we can have multiple different types of relations
            edge_id = '{}-{}-{}'.format(from_id, label, to_id)

            # create the edge or return it if it already exists
            self._do_next(
                self.g.E(edge_id).fold().coalesce(
                    __.unfold(),
                    __.V(from_id).addE(label).to(__.V(to_id)).property(
                        T.id, edge_id)).property(DATE, utils.get_date_now()))
            # get the edge object
            edge = self.g.E(edge_id).limit(1)

            # add the reference data that was supplied as edge properties
            for k, v in extra_properties.items():
                edge = edge.property(k, v)

            if len(extra_properties) != 0:
                edge.next()
        except ConstraintViolationException:
            pass
Ejemplo n.º 13
0
 def test_upsert(self) -> None:
     g = __.V().has('User', 'key', 'jack').fold().coalesce(
         unfold(),
         addV('User').property(Cardinality.single, 'key', 'jack')). \
         coalesce(__.has('email', '*****@*****.**'),
                  __.property(Cardinality.single, 'email', '*****@*****.**')). \
         coalesce(__.has('url', 'https://twitter.com/jack'),
                  __.property(Cardinality.single, 'url', 'https://twitter.com/jack'))
     actual = ScriptTranslator.translateB('g', g)
     self.assertEqual(
         actual,
         '''g.V().has("User","key","jack").fold().coalesce(__.unfold(),__.addV("User").property(single,"key","jack")).coalesce(__.has("email","*****@*****.**"),__.property(single,"email","*****@*****.**")).coalesce(__.has("url","https://twitter.com/jack"),__.property(single,"url","https://twitter.com/jack"))'''
     )  # noqa: E501
 def add_edges_for_label(rows):
     conn = self.remote_connection()
     g = self.traversal_source(conn)
     for row in rows:
         entries = row.asDict()
         create_traversal = __.V(row['~from']).addE(label).to(
             V(row['~to'])).property(id, row['~id'])
         for key, value in entries.iteritems():
             key = key.split(':')[0]
             if key not in ['~id', '~from', '~to', '~label']:
                 create_traversal.property(key, value)
         g.E(entries['~id']).fold().coalesce(__.unfold(),
                                             create_traversal).next()
     conn.close()
Ejemplo n.º 15
0
    def __add_entities(self, t, entity, post_url):
        t = (
            t.V(f'{entity["Text"]}_{entity["Type"]}')
            .fold()
            .coalesce(
                __.unfold(),
                __.addV(entity["Type"].lower())
                .property(T.id, f'{entity["Text"]}_{entity["Type"]}')
                .property("text", entity["Text"])
                .property("type", entity["Type"])
            ).addE('found_in').from_(__.V(post_url))
            .property('score', entity['Score'])
        )

        return t
Ejemplo n.º 16
0
def _build_gremlin_edges(g: GraphTraversalSource,
                         row: pd.Series) -> GraphTraversalSource:
    g = (g.V(str(row["~from"])).fold().coalesce(
        __.unfold(),
        _build_gremlin_vertices(__, {
            "~id": row["~from"],
            "~label": "Vertex"
        })).addE(row["~label"]).to(
            __.V(str(row["~to"])).fold().coalesce(
                __.unfold(),
                _build_gremlin_vertices(__, {
                    "~id": row["~to"],
                    "~label": "Vertex"
                }))))
    g = _build_gremlin_properties(g, row)

    return g
Ejemplo n.º 17
0
def _build_gremlin_insert_edges(
        g: GraphTraversalSource, row: pd.Series,
        use_header_cardinality: bool) -> GraphTraversalSource:
    g = (g.V(str(row["~from"])).fold().coalesce(
        __.unfold(),
        _build_gremlin_insert_vertices(__, {
            "~id": row["~from"],
            "~label": "Vertex"
        })).addE(row["~label"]).property(T.id, str(row["~id"])).to(
            __.V(str(row["~to"])).fold().coalesce(
                __.unfold(),
                _build_gremlin_insert_vertices(__, {
                    "~id": row["~to"],
                    "~label": "Vertex"
                }))))
    g = _set_properties(g, use_header_cardinality, row)

    return g
 def add_edges_for_label(rows):
     try:
         conn = self.gremlin_utils.remote_connection()
         g = self.gremlin_utils.traversal_source(connection=conn)
         for row in rows:
             entries = row.asDict()
             create_traversal = __.V(row['~from']).addE(label).to(
                 V(row['~to'])).property(id, row['~id'])
             for key, value in entries.items():
                 key = key.split(':')[0]
                 if key not in ['~id', '~from', '~to', '~label']:
                     create_traversal.property(key, value)
             g.E(entries['~id']).fold().coalesce(
                 __.unfold(), create_traversal).next()
         conn.close()
     except GremlinServerError as err:
         print("Neptune error: {0}".format(err))
     except:
         print("Unexpected error:", sys.exc_info()[0])
Ejemplo n.º 19
0
def upsert_edge(record, edge_mapping, g):
    edge_label = edge_mapping['edge_label']
    # Simple logic, requiring that Vertices must exist before edge can be added.
    # Ensure all lookup values are present first
    out_lookup_values = get_lookup_values(
        record, edge_mapping['out_vertex']['lookup_properties'])
    in_lookup_values = get_lookup_values(
        record, edge_mapping['in_vertex']['lookup_properties'])
    if out_lookup_values is None or in_lookup_values is None:
        return

    try:
        traversal = g.V().hasLabel(edge_mapping['out_vertex']['vertex_label'])
        insertion_traversal = __.V().hasLabel(
            edge_mapping['out_vertex']['vertex_label'])

        for prop_key, lookup_value in out_lookup_values.items():
            traversal = traversal.has(prop_key, lookup_value)
            insertion_traversal = insertion_traversal.has(
                prop_key, lookup_value)

        traversal = traversal.as_('out').V().hasLabel(
            edge_mapping['in_vertex']['vertex_label'])
        insertion_traversal = insertion_traversal.as_('out2').V().hasLabel(
            edge_mapping['in_vertex']['vertex_label'])

        for prop_key, lookup_value in in_lookup_values.items():
            traversal = traversal.has(prop_key, lookup_value)
            insertion_traversal = insertion_traversal.has(
                prop_key, lookup_value)

        insertion_traversal = insertion_traversal.addE(edge_label).from_(
            'out2')
        traversal = traversal.as_('in').inE(edge_label).as_('e').outV().where(
            P.eq('out')).fold().coalesce(__.unfold(),
                                         insertion_traversal).next()

    except:
        print("Edge error - skipping: {0}({1}) --{2}-> {3}({4})".format(
            edge_mapping['out_vertex']['vertex_label'], out_lookup_values,
            edge_label, edge_mapping['in_vertex']['vertex_label'],
            in_lookup_values))
    def query_target_subgraph(self, target_id, tr_dict, transaction_value_cols,
                              union_id_cols, dummied_col):
        """Extract 2nd degree subgraph of target transaction.Dump data into subgraph dict and n_feats dict.
        subgraph_dict:  related transactions' id list and values through edges
        n_feats dict: related 1 degree vertex and transactions' embeded elements vectors. 
        Usually after insert new test sample's vertex and edges into graphDB. 
        
        Example:
        >>> query_target_subgraph('3661635', load_data_from_event(), 'M2_T,M3_F,M3_T,...')
        """
        subgraph_dict = {}
        neighbor_list = []
        neighbor_dict = {}
        transaction_embed_value_dict = {}

        ii = 0
        s_t = dt.now()

        conn = self.gremlin_utils.remote_connection()
        g = self.gremlin_utils.traversal_source(connection=conn)

        target_name = target_id[(target_id.find('-') + 1):]
        feature_list = g.V().has(id, target_id).out().id().toList()
        for feat in feature_list:
            ii += 1
            feat_name = feat[:feat.find('-')]
            feat_value = feat[(feat.find('-') + 1):]
            node_list = g.V().has(
                id, feat).both().limit(MAX_FEATURE_NODE).id().toList()
            target_and_conn_node_list = [int(target_name)] + [
                int(target_conn_node[(target_conn_node.find('-') + 1):])
                for target_conn_node in node_list
            ]
            target_and_conn_node_list = list(set(target_and_conn_node_list))
            neighbor_list += target_and_conn_node_list
            nodes_and_feature_value_array = (target_and_conn_node_list,
                                             [feat_value] *
                                             len(target_and_conn_node_list))
            subgraph_dict['target<>' +
                          feat_name] = nodes_and_feature_value_array

        e_t = dt.now()
        logger.info(
            f'INSIDE query_target_subgraph: subgraph_dict used {(e_t - s_t).total_seconds()} seconds'
        )
        new_s_t = e_t

        union_li = [
            __.V().has(id, target_id).both().hasLabel(label).both().limit(
                MAX_FEATURE_NODE) for label in union_id_cols
        ]

        if len(union_id_cols) == 51:
            node_dict = g.V().has(id,target_id).union(__.both().hasLabel('card1').both().limit(MAX_FEATURE_NODE),\
                    union_li[1], union_li[2], union_li[3], union_li[4], union_li[5],\
                    union_li[6], union_li[7], union_li[8], union_li[9], union_li[10],\
                    union_li[11], union_li[12], union_li[13], union_li[14], union_li[15],\
                    union_li[16], union_li[17], union_li[18], union_li[19], union_li[20],\
                    union_li[21], union_li[22], union_li[23], union_li[24], union_li[25],\
                    union_li[26], union_li[27], union_li[28], union_li[29], union_li[30],\
                    union_li[31], union_li[32], union_li[33], union_li[34], union_li[35],\
                    union_li[36], union_li[37], union_li[38], union_li[39], union_li[40],\
                    union_li[41], union_li[42], union_li[43], union_li[44], union_li[45],\
                    union_li[46], union_li[47], union_li[48], union_li[49], union_li[50]).elementMap().toList()
        else:
            node_dict = g.V().has(id,target_id).union(__.both().hasLabel('card1').both().limit(MAX_FEATURE_NODE),\
                    union_li[1], union_li[2], union_li[3], union_li[4], union_li[5],\
                    union_li[6], union_li[7], union_li[8], union_li[9], union_li[10]).elementMap().toList()

        e_t = dt.now()
        logger.info(
            f'INSIDE query_target_subgraph: node_dict used {(e_t - new_s_t).total_seconds()} seconds.'
        )
        new_s_t = e_t

        logger.debug(f'Found {len(node_dict)} nodes from graph dbs...')

        class Item():
            def __init__(self, item):
                self.item = item

            def __hash__(self):
                return hash(self.item.get(list(self.item)[0]))

            def __eq__(self, other):
                if isinstance(other, self.__class__):
                    return self.__hash__() == other.__hash__()
                else:
                    return NotImplemented

            def __repr__(self):
                return "Item(%s)" % (self.item)

        node_dict = list(set([Item(node) for node in node_dict]))
        logger.debug(f'Found {len(node_dict)} nodes without duplication')
        for item in node_dict:
            item = item.item
            node = item.get(list(item)[0])
            node_value = node[(node.find('-') + 1):]
            try:
                logger.debug(
                    f'the props of node {node} is {item.get(attr_version_key)}'
                )
                jsonVal = json.loads(item.get(attr_version_key))
                neighbor_dict[node_value] = [
                    jsonVal[key] for key in transaction_value_cols
                ]
                logger.debug(
                    f'neighbor pair is {node_value}, {neighbor_dict[node_value]}'
                )
            except json.JSONDecodeError:
                logger.warn(
                    f'Malform node value {node} is {item.get(attr_version_key)}, run below cmd to remove it'
                )
                logger.info(f'g.V(\'{node}\').drop()')

        target_value = target_id[(target_id.find('-') + 1):]
        jsonVal = json.loads(tr_dict[0].get(attr_version_key))
        neighbor_dict[target_value] = [
            jsonVal[key] for key in transaction_value_cols
        ]

        logger.info(
            f'INSIDE query_target_subgraph: neighbor_dict used {(e_t - new_s_t).total_seconds()} seconds.'
        )

        attr_cols = ['val' + str(x) for x in range(1, 391)]
        for attr in feature_list:
            attr_name = attr[:attr.find('-')]
            attr_value = attr[(attr.find('-') + 1):]
            attr_dict = g.V().has(id, attr).valueMap().toList()[0]
            logger.debug(f'attr is {attr}, dict is {attr_dict}')
            jsonVal = json.loads(attr_dict.get(attr_version_key)[0])
            attr_dict = [float(jsonVal[key]) for key in attr_cols]
            attr_input_dict = {}
            attr_input_dict[attr_value] = attr_dict
            transaction_embed_value_dict[attr_name] = attr_input_dict

        e_t = dt.now()
        logger.info(
            f'INSIDE query_target_subgraph: transaction_embed_value_dict used {(e_t - new_s_t).total_seconds()} seconds. Total test cost {(e_t - s_t).total_seconds()} seconds.'
        )
        new_s_t = e_t

        transaction_embed_value_dict['target'] = neighbor_dict

        conn.close()

        return subgraph_dict, transaction_embed_value_dict
    def test_upsert_thrice(self) -> None:
        executor = mock.Mock(wraps=self.get_proxy().query_executor())

        # test that we will insert
        db_name = Fixtures.next_database()
        database_uri = f'database://{db_name}'
        vertex_type = VertexType(
            label=VertexTypes.Database.value.label,
            properties=VertexTypes.Database.value.properties +
            tuple([Property(name='foo', type=GremlinType.String)]))

        exists = self._get(label=vertex_type,
                           key=database_uri,
                           extra_traversal=__.count())
        self.assertEqual(exists, 0)
        _upsert(executor=executor,
                g=self.get_proxy().g,
                key_property_name=self.get_proxy().key_property_name,
                label=vertex_type,
                key=database_uri,
                name='test',
                foo='bar')
        exists = self._get(label=vertex_type,
                           key=database_uri,
                           extra_traversal=__.count())
        self.assertEqual(exists, 1)
        id = self._get(label=vertex_type,
                       key=database_uri,
                       extra_traversal=__.id())

        executor.reset_mock()
        _upsert(executor=executor,
                g=self.get_proxy().g,
                key_property_name=self.get_proxy().key_property_name,
                label=vertex_type,
                key=database_uri,
                name='test')
        exists = self._get(label=vertex_type,
                           key=database_uri,
                           extra_traversal=__.count())
        self.assertEqual(exists, 1)
        self.assertEqual(executor.call_count, 2)
        # first one is the get:
        self.assertEqual(executor.call_args_list[0][1]['query'].bytecode,
                         __.V(id).valueMap(True).bytecode)
        # the second one should be like
        self.assertEqual(executor.call_args_list[1][1]['query'].bytecode,
                         __.V(id).id().bytecode)

        executor.reset_mock()
        _upsert(executor=executor,
                g=self.get_proxy().g,
                key_property_name=self.get_proxy().key_property_name,
                label=vertex_type,
                key=database_uri,
                name='test2',
                foo=None)
        exists = self._get(label=vertex_type,
                           key=database_uri,
                           extra_traversal=__.count())
        self.assertEqual(exists, 1)
        self.assertEqual(executor.call_count, 2)
        # first one is the get:
        self.assertEqual(executor.call_args_list[0][1]['query'].bytecode,
                         __.V(id).valueMap(True).bytecode)
        # the second one should be like
        self.assertEqual(
            executor.call_args_list[1][1]['query'].bytecode,
            __.V(id).sideEffect(__.properties('foo').drop()).property(
                Cardinality.single, 'name', 'test2').id().bytecode)
Ejemplo n.º 22
0
    def query_target_subgraph(self, target_id, tr_dict, transaction_value_cols, union_id_cols, dummied_col):
        """Extract 2nd degree subgraph of target transaction.Dump data into subgraph dict and n_feats dict.
        subgraph_dict:  related transactions' id list and values through edges
        n_feats dict: related 1 degree vertex and transactions' embeded elements vectors. 
        Usually after insert new test sample's vertex and edges into graphDB. 
        
        Example:
        >>> query_target_subgraph('3661635', load_data_from_event(), 'M2_T,M3_F,M3_T,...')
        """
        subgraph_dict = {}
        neighbor_list = []
        neighbor_dict = {}
        transaction_embed_value_dict = {}
        
        ii = 0
        s_t = dt.now()
        
        conn = self.gremlin_utils.remote_connection()
        g = self.gremlin_utils.traversal_source(connection=conn) 

        target_name = target_id[(target_id.find('-')+1):]
        feature_list = g.V().has(id,target_id).out().id().toList()
        for feat in feature_list:
            ii += 1
            feat_name = feat[:feat.find('-')]
            feat_value = feat[(feat.find('-')+1):]
            node_list = g.V().has(id,feat).both().limit(MAX_FEATURE_NODE).id().toList()
            target_and_conn_node_list = [int(target_name)]+[int(target_conn_node[(target_conn_node.find('-')+1):]) for target_conn_node in node_list]
            target_and_conn_node_list = list(set(target_and_conn_node_list))
            neighbor_list += target_and_conn_node_list
            nodes_and_feature_value_array = (target_and_conn_node_list,[feat_value]*len(target_and_conn_node_list))
            subgraph_dict['target<>'+feat_name] = nodes_and_feature_value_array
        
        e_t = dt.now()
        logger.info(f'INSIDE query_target_subgraph: subgraph_dict used {(e_t - s_t).total_seconds()} seconds')
        logger.info(f'subgraph_dict len: {len(subgraph_dict.keys())}  key: {subgraph_dict.keys()}')
        logger.info(f'subgraph_dict: {subgraph_dict}')
        new_s_t = e_t

        union_li = [__.V().has(id,target_id).both().hasLabel(label).both().limit(MAX_FEATURE_NODE) for label in union_id_cols]
        logger.info(f'union_id_cols len: {len(union_id_cols)}  key: {union_id_cols}')
        logger.info(f'union_li len: {len(union_li)}  key: {union_li}')

        if len(union_id_cols) == 51:
            node_dict = g.V().has(id,target_id).union(__.both().hasLabel('card1').both().limit(MAX_FEATURE_NODE),\
                    union_li[1], union_li[2], union_li[3], union_li[4], union_li[5],\
                    union_li[6], union_li[7], union_li[8], union_li[9], union_li[10],\
                    union_li[11], union_li[12], union_li[13], union_li[14], union_li[15],\
                    union_li[16], union_li[17], union_li[18], union_li[19], union_li[20],\
                    union_li[21], union_li[22], union_li[23], union_li[24], union_li[25],\
                    union_li[26], union_li[27], union_li[28], union_li[29], union_li[30],\
                    union_li[31], union_li[32], union_li[33], union_li[34], union_li[35],\
                    union_li[36], union_li[37], union_li[38], union_li[39], union_li[40],\
                    union_li[41], union_li[42], union_li[43], union_li[44], union_li[45],\
                    union_li[46], union_li[47], union_li[48], union_li[49], union_li[50]).elementMap().toList()
        else:
            node_dict = g.V().has(id,target_id).union(__.both().hasLabel('card1').both().limit(MAX_FEATURE_NODE),\
                    union_li[1], union_li[2], union_li[3], union_li[4], union_li[5],\
                    union_li[6], union_li[7], union_li[8], union_li[9], union_li[10]).elementMap().toList()

        e_t = dt.now()
        logger.info(f'INSIDE query_target_subgraph: node_dict used {(e_t - new_s_t).total_seconds()} seconds.')
        new_s_t = e_t

        logger.info(f'node_dict len: {len(node_dict)}  key: {node_dict}')

        for item in node_dict:
            node = item.get(list(item)[0])
            node_value = node[(node.find('-')+1):]
            neighbor_dict[node_value] = [item.get(key) for key in transaction_value_cols]

        target_value = target_id[(target_id.find('-')+1):]
        neighbor_dict[target_value] = [tr_dict[0].get(key) for key in transaction_value_cols]
        
        logger.info(f'INSIDE query_target_subgraph: node_dict used {(e_t - new_s_t).total_seconds()} seconds.')
        logger.info(f'neighbor_dict len: {len(neighbor_dict.keys())}  key: {neighbor_dict.keys()}')
        logger.info(f'neighbor_dict: {neighbor_dict}')
        
        
        attr_cols = ['val'+str(x) for x in range(1,391)]
        for attr in feature_list:
            attr_name = attr[:attr.find('-')]
            attr_value = attr[(attr.find('-')+1):]
            attr_dict = g.V().has(id,attr).valueMap().toList()[0]
            attr_dict = [attr_dict.get(key)[-1] for key in attr_cols]
            attr_input_dict = {}
            attr_input_dict[attr_value] =  attr_dict
            transaction_embed_value_dict[attr_name] = attr_input_dict
        
        e_t = dt.now()
        logger.info(f'INSIDE query_target_subgraph: transaction_embed_value_dict used {(e_t - new_s_t).total_seconds()} seconds. Total test cost {(e_t - s_t).total_seconds()} seconds.')
        new_s_t = e_t
        
        transaction_embed_value_dict['target'] = neighbor_dict

        conn.close()   

        logger.info(f'transaction_embed_value_dict len: {len(transaction_embed_value_dict.keys())} key: {transaction_embed_value_dict.keys()}')
        logger.info(f'transaction_embed_value_dict: {transaction_embed_value_dict}')

        return subgraph_dict, transaction_embed_value_dict    
Ejemplo n.º 23
0
    def test_translations(self):
        g = traversal().withGraph(Graph())

        tests = list()
        # 0
        tests.append([g.V(),
                     "g.V()"])
        # 1
        tests.append([g.V('1', '2', '3', '4'),
                     "g.V('1','2','3','4')"])
        # 2
        tests.append([g.V('3').valueMap(True),
                     "g.V('3').valueMap(True)"])
        # 3
        tests.append([g.V().constant(5),
                     "g.V().constant(5)"])
        # 4
        tests.append([g.V().constant(1.5),
                     "g.V().constant(1.5)"])
        # 5
        tests.append([g.V().constant('Hello'),
                     "g.V().constant('Hello')"])
        # 6
        tests.append([g.V().hasLabel('airport').limit(5),
                     "g.V().hasLabel('airport').limit(5)"])
        # 7
        tests.append([g.V().hasLabel(within('a', 'b', 'c')),
                     "g.V().hasLabel(within(['a','b','c']))"])
        # 8
        tests.append([g.V().hasLabel('airport', 'continent').out().limit(5),
                     "g.V().hasLabel('airport','continent').out().limit(5)"])
        # 9
        tests.append([g.V().hasLabel('airport').out().values('code').limit(5),
                     "g.V().hasLabel('airport').out().values('code').limit(5)"])
        # 10
        tests.append([g.V('3').as_('a').out('route').limit(10).where(eq('a')).by('region'),
                     "g.V('3').as('a').out('route').limit(10).where(eq('a')).by('region')"])
        # 11
        tests.append([g.V('3').repeat(__.out('route').simplePath()).times(2).path().by('code'),
                     "g.V('3').repeat(__.out('route').simplePath()).times(2).path().by('code')"])
        # 12
        tests.append([g.V().hasLabel('airport').out().has('region', 'US-TX').values('code').limit(5),
                     "g.V().hasLabel('airport').out().has('region','US-TX').values('code').limit(5)"])
        # 13
        tests.append([g.V().hasLabel('airport').union(__.values('city'), __.values('region')).limit(5),
                     "g.V().hasLabel('airport').union(__.values('city'),__.values('region')).limit(5)"])
        # 14
        tests.append([g.V('3').as_('a').out('route', 'routes'),
                     "g.V('3').as('a').out('route','routes')"])
        # 15
        tests.append([g.V().where(__.values('runways').is_(5)),
                    "g.V().where(__.values('runways').is(5))"])
        # 16
        tests.append([g.V('3').repeat(__.out().simplePath()).until(__.has('code', 'AGR')).path().by('code').limit(5),
                     "g.V('3').repeat(__.out().simplePath()).until(__.has('code','AGR')).path().by('code').limit(5)"])
        # 17
        tests.append([g.V().hasLabel('airport').order().by(__.id()),
                     "g.V().hasLabel('airport').order().by(__.id())"])
        # 18
        tests.append([g.V().hasLabel('airport').order().by(T.id),
                     "g.V().hasLabel('airport').order().by(T.id)"])
        # 19
        tests.append([g.V().hasLabel('airport').order().by(__.id(),Order.desc),
                     "g.V().hasLabel('airport').order().by(__.id(),Order.desc)"])
        # 20
        tests.append([g.V().hasLabel('airport').order().by('code',Order.desc),
                     "g.V().hasLabel('airport').order().by('code',Order.desc)"])
        # 21
        tests.append([g.V('1', '2', '3').local(__.out().out().dedup().fold()),
                     "g.V('1','2','3').local(__.out().out().dedup().fold())"])
        # 22
        tests.append([g.V('3').out().path().count(Scope.local),
                     "g.V('3').out().path().count(Scope.local)"])
        # 23
        tests.append([g.E().count(),
                     "g.E().count()"])
        # 24
        tests.append([g.V('5').outE('route').inV().path().limit(10),
                     "g.V('5').outE('route').inV().path().limit(10)"])
        # 25
        tests.append([g.V('5').propertyMap().select(Column.keys),
                     "g.V('5').propertyMap().select(Column.keys)"])
        # 26
        tests.append([g.V('5').propertyMap().select(Column.values),
                     "g.V('5').propertyMap().select(Column.values)"])
        # 27
        tests.append([g.V('3').values('runways').math('_ + 1'),
                     "g.V('3').values('runways').math('_ + 1')"])
        # 28
        tests.append([g.V('3').emit().repeat(__.out().simplePath()).times(3).limit(5).path(),
                     "g.V('3').emit().repeat(__.out().simplePath()).times(3).limit(5).path()"])
        # 29
        tests.append([g.V().match(__.as_('a').has('code', 'LHR').as_('b')).select('b').by('code'),
                     "g.V().match(__.as('a').has('code','LHR').as('b')).select('b').by('code')"])
        # 30
        tests.append([g.V().has('test-using-keyword-as-property','repeat'),
                     "g.V().has('test-using-keyword-as-property','repeat')"])
        # 31
        tests.append([g.V('1').addE('test').to(__.V('4')),
                     "g.V('1').addE('test').to(__.V('4'))"])
        # 32
        tests.append([g.V().values('runways').max(),
                     "g.V().values('runways').max()"])
        # 33
        tests.append([g.V().values('runways').min(),
                     "g.V().values('runways').min()"])
        # 34
        tests.append([g.V().values('runways').sum(),
                     "g.V().values('runways').sum()"])
        # 35
        tests.append([g.V().values('runways').mean(),
                     "g.V().values('runways').mean()"])
        # 36
        tests.append([g.withSack(0).V('3', '5').sack(Operator.sum).by('runways').sack(),
                     "g.withSack(0).V('3','5').sack(Operator.sum).by('runways').sack()"])
        # 37
        tests.append([g.V('3').values('runways').store('x').V('4').values('runways').store('x').by(__.constant(1)).V('6').store('x').by(__.constant(1)).select('x').unfold().sum(),
                     "g.V('3').values('runways').store('x').V('4').values('runways').store('x').by(__.constant(1)).V('6').store('x').by(__.constant(1)).select('x').unfold().sum()"])
        # 38
        tests.append([g.inject(3, 4, 5),
                     "g.inject(3,4,5)"])
        # 39
        tests.append([g.inject([3, 4, 5]),
                     "g.inject([3, 4, 5])"])
        # 40
        tests.append([g.inject(3, 4, 5).count(),
                     "g.inject(3,4,5).count()"])
        # 41
        tests.append([g.V().has('runways', gt(5)).count(),
                     "g.V().has('runways',gt(5)).count()"])
        # 42
        tests.append([g.V().has('runways', lte(5.3)).count(),
                     "g.V().has('runways',lte(5.3)).count()"])
        # 43
        tests.append([g.V().has('code', within(123,124)),
                     "g.V().has('code',within([123,124]))"])
        # 44
        tests.append([g.V().has('code', within(123, 'abc')),
                     "g.V().has('code',within([123,'abc']))"])
        # 45
        tests.append([g.V().has('code', within('abc', 123)),
                     "g.V().has('code',within(['abc',123]))"])
        # 46
        tests.append([g.V().has('code', within('abc', 'xyz')),
                     "g.V().has('code',within(['abc','xyz']))"])
        # 47
        tests.append([g.V('1', '2').has('region', P.within('US-TX','US-GA')),
                     "g.V('1','2').has('region',within(['US-TX','US-GA']))"])
        # 48
        tests.append([g.V().and_(__.has('runways', P.gt(5)), __.has('region','US-TX')),
                     "g.V().and(__.has('runways',gt(5)),__.has('region','US-TX'))"])
        # 49
        tests.append([g.V().union(__.has('runways', gt(5)), __.has('region','US-TX')),
                     "g.V().union(__.has('runways',gt(5)),__.has('region','US-TX'))"])
        # 50
        tests.append([g.V('3').choose(__.values('runways').is_(3),__.constant('three'),__.constant('not three')),
                     "g.V('3').choose(__.values('runways').is(3),__.constant('three'),__.constant('not three'))"])
        # 51
        tests.append([g.V('3').choose(__.values('runways')).option(1,__.constant('three')).option(2,__.constant('not three')),
                     "g.V('3').choose(__.values('runways')).option(1,__.constant('three')).option(2,__.constant('not three'))"])
        # 52
        tests.append([g.V('3').choose(__.values('runways')).option(1.5,__.constant('one and a half')).option(2,__.constant('not three')),
                     "g.V('3').choose(__.values('runways')).option(1.5,__.constant('one and a half')).option(2,__.constant('not three'))"])
        # 53
        tests.append([g.V('3').repeat(__.out().simplePath()).until(__.loops().is_(1)).count(),
                     "g.V('3').repeat(__.out().simplePath()).until(__.loops().is(1)).count()"])
        # 54
        tests.append([g.V().hasLabel('airport').limit(20).group().by('region').by('code').order(Scope.local).by(Column.keys),
                     "g.V().hasLabel('airport').limit(20).group().by('region').by('code').order(Scope.local).by(Column.keys)"])
        # 55
        tests.append([g.V('1').as_('a').V('2').as_('a').select(Pop.all_, 'a'),
                     "g.V('1').as('a').V('2').as('a').select(Pop.all,'a')"])
        # 56
        tests.append([g.addV('test').property(Cardinality.set_, 'p1', 10),
                     "g.addV('test').property(Cardinality.set,'p1',10)"])
        # 57
        tests.append([g.addV('test').property(Cardinality.list_, 'p1', 10),
                     "g.addV('test').property(Cardinality.list,'p1',10)"])

        # 58
        tests.append([g.addV('test').property(Cardinality.single, 'p1', 10),
                     "g.addV('test').property(Cardinality.single,'p1',10)"])
        # 59
        tests.append([g.V().limit(5).order().by(T.label),
                     "g.V().limit(5).order().by(T.label)"])

        # 60
        tests.append([g.V().range(1, 5),
                     "g.V().range(1,5)"])

        # 61
        tests.append([g.addV('test').property('p1', 123),
                     "g.addV('test').property('p1',123)"])

        # 62
        tests.append([g.addV('test').property('date',datetime(2021, 2, 1, 9, 30)),
                     "g.addV('test').property('date',new Date(121,2,1,9,30,0))"])
        # 63
        tests.append([g.addV('test').property('date',datetime(2021, 2, 1)),
                     "g.addV('test').property('date',new Date(121,2,1,0,0,0))"])
        # 64
        tests.append([g.addE('route').from_(__.V('1')).to(__.V('2')),
                     "g.addE('route').from(__.V('1')).to(__.V('2'))"])
        # 65
        tests.append([g.withSideEffect('a', [1, 2]).V('3').select('a'),
                     "g.withSideEffect('a',[1, 2]).V('3').select('a')"])
        # 66
        tests.append([g.withSideEffect('a', 1).V('3').select('a'),
                     "g.withSideEffect('a',1).V('3').select('a')"])
        # 67
        tests.append([g.withSideEffect('a', 'abc').V('3').select('a'),
                     "g.withSideEffect('a','abc').V('3').select('a')"])
        # 68
        tests.append([g.V().has('airport', 'region', 'US-NM').limit(3).values('elev').fold().index(),
                     "g.V().has('airport','region','US-NM').limit(3).values('elev').fold().index()"])
        # 69
        tests.append([g.V('3').repeat(__.timeLimit(1000).out().simplePath()).until(__.has('code', 'AGR')).path(),
                     "g.V('3').repeat(__.timeLimit(1000).out().simplePath()).until(__.has('code','AGR')).path()"])

        # 70
        tests.append([g.V().hasLabel('airport').where(__.values('elev').is_(gt(14000))),
                     "g.V().hasLabel('airport').where(__.values('elev').is(gt(14000)))"])

        # 71
        tests.append([g.V().hasLabel('airport').where(__.out().count().is_(gt(250))).values('code'),
                     "g.V().hasLabel('airport').where(__.out().count().is(gt(250))).values('code')"])

        # 72
        tests.append([g.V().hasLabel('airport').filter(__.out().count().is_(gt(250))).values('code'),
                     "g.V().hasLabel('airport').filter(__.out().count().is(gt(250))).values('code')"])
        # 73
        tests.append([g.withSack(0).
                        V('3').
                        repeat(__.outE('route').sack(Operator.sum).by('dist').inV()).
                        until(__.has('code', 'AGR').or_().loops().is_(4)).
                        has('code', 'AGR').
                        local(__.union(__.path().by('code').by('dist'),__.sack()).fold()).
                        limit(10),
                     "g.withSack(0).V('3').repeat(__.outE('route').sack(Operator.sum).by('dist').inV()).until(__.has('code','AGR').or().loops().is(4)).has('code','AGR').local(__.union(__.path().by('code').by('dist'),__.sack()).fold()).limit(10)"])

        # 74
        tests.append([g.addV().as_('a').addV().as_('b').addE('knows').from_('a').to('b'),
                     "g.addV().as('a').addV().as('b').addE('knows').from('a').to('b')"])

        # 75
        tests.append([g.addV('Person').as_('a').addV('Person').as_('b').addE('knows').from_('a').to('b'),
                     "g.addV('Person').as('a').addV('Person').as('b').addE('knows').from('a').to('b')"])
        # 76
        tests.append([g.V('3').project('Out','In').by(__.out().count()).by(__.in_().count()),
                     "g.V('3').project('Out','In').by(__.out().count()).by(__.in().count())"])
        # 77
        tests.append([g.V('44').out().aggregate('a').out().where(within('a')).path(),
                     "g.V('44').out().aggregate('a').out().where(within(['a'])).path()"])
        # 78
        tests.append([g.V().has('date', datetime(2021, 2, 22)),
                     "g.V().has('date',new Date(121,2,22,0,0,0))"])
        # 79
        tests.append([g.V().has('date', within(datetime(2021, 2, 22), datetime(2021, 1, 1))),
                      "g.V().has('date',within([new Date(121,2,22,0,0,0),new Date(121,1,1,0,0,0)]))"])
        # 80
        tests.append([g.V().has('date', between(datetime(2021, 1, 1), datetime(2021, 2, 22))),
                                "g.V().has('date',between(new Date(121,1,1,0,0,0),new Date(121,2,22,0,0,0)))"])
        # 81
        tests.append([g.V().has('date', inside(datetime(2021, 1, 1),datetime(2021, 2, 22))),
                                "g.V().has('date',inside(new Date(121,1,1,0,0,0),new Date(121,2,22,0,0,0)))"])
        # 82
        tests.append([g.V().has('date', P.gt(datetime(2021, 1, 1, 9, 30))),
                     "g.V().has('date',gt(new Date(121,1,1,9,30,0)))"])
        # 83
        tests.append([g.V().has('runways', between(3,5)),
                     "g.V().has('runways',between(3,5))"])
        # 84
        tests.append([g.V().has('runways', inside(3,5)),
                     "g.V().has('runways',inside(3,5))"])
        # 85
        tests.append([g.V('44').outE().elementMap(),
                     "g.V('44').outE().elementMap()"])
        # 86
        tests.append([g.V('44').valueMap().by(__.unfold()),
                     "g.V('44').valueMap().by(__.unfold())"])
        # 87
        tests.append([g.V('44').valueMap().with_(WithOptions.tokens,WithOptions.labels),
                     "g.V('44').valueMap().with(WithOptions.tokens,WithOptions.labels)"])
        # 88
        tests.append([g.V('44').valueMap().with_(WithOptions.tokens),
                     "g.V('44').valueMap().with(WithOptions.tokens)"])
        # 89
        tests.append([g.withStrategies(ReadOnlyStrategy()).addV('test'),
                      "g.withStrategies(new ReadOnlyStrategy()).addV('test')"])
        # 90
        strategy = SubgraphStrategy(vertices=__.has('region', 'US-TX'), edges=__.hasLabel('route'))
        tests.append([g.withStrategies(strategy).V().count(),
                    "g.withStrategies(new SubgraphStrategy(vertices:__.has('region','US-TX'),edges:__.hasLabel('route'))).V().count()"])
        # 91
        strategy = SubgraphStrategy(vertex_properties=__.hasNot('runways'))
        tests.append([g.withStrategies(strategy).V().count(),
                      "g.withStrategies(new SubgraphStrategy(vertexProperties:__.hasNot('runways'))).V().count()"])
        # 92
        strategy = SubgraphStrategy(vertices=__.has('region', 'US-TX'),vertex_properties=__.hasNot('runways'))
        tests.append([g.withStrategies(strategy).V().count(),
                      "g.withStrategies(new SubgraphStrategy(vertices:__.has('region','US-TX'),vertexProperties:__.hasNot('runways'))).V().count()"])
        # 93
        strategy = SubgraphStrategy(vertices=__.has('region', 'US-TX'), edges=__.hasLabel('route'))
        tests.append([g.withStrategies(ReadOnlyStrategy(),strategy).V().count(),
                      "g.withStrategies(new ReadOnlyStrategy(),new SubgraphStrategy(vertices:__.has('region','US-TX'),edges:__.hasLabel('route'))).V().count()"])
        # 94
        strategy = SubgraphStrategy(vertices=__.has('region', 'US-TX'))
        tests.append([g.withStrategies(ReadOnlyStrategy(), strategy).V().count(),
                      "g.withStrategies(new ReadOnlyStrategy(),new SubgraphStrategy(vertices:__.has('region','US-TX'))).V().count()"])
        # 95
        tests.append([g.with_('evaluationTimeout', 500).V().count(),
                      "g.withStrategies(new OptionsStrategy(evaluationTimeout:500)).V().count()"])
        # 96
        tests.append([g.withStrategies(OptionsStrategy({'evaluationTimeout': 500})).V().count(),
                     "g.withStrategies(new OptionsStrategy(evaluationTimeout:500)).V().count()"])
        # 97
        tests.append([g.withStrategies(PartitionStrategy(partition_key="partition", write_partition="a", read_partitions=["a"])).addV('test'),
                     "g.withStrategies(new PartitionStrategy(partitionKey:'partition',writePartition:'a',readPartitions:['a'])).addV('test')"])
        # 98
        tests.append([g.withComputer().V().shortestPath().with_(ShortestPath.target, __.has('name','peter')),
                     "g.withStrategies(new VertexProgramStrategy()).V().shortestPath().with('~tinkerpop.shortestPath.target',__.has('name','peter'))"])

        tlr = Translator().of('g')

        for t in range(len(tests)):
            a = tlr.translate(tests[t][0].bytecode)
            assert a == tests[t][1]