Example #1
0
 def drop(self) -> None:
     test_shard = get_shard()
     g = self.g.V()
     if test_shard:
         g = g.has(WellKnownProperties.TestShard.value.name, test_shard)
     g = g.drop()
     LOGGER.warning('DROPPING ALL NODES')
     self.query_executor()(query=g, get=FromResultSet.iterate)
     # we seem to mess this up easily
     leftover = self.query_executor()(query=self.g.V().hasId(TextP.startingWith(test_shard)).id(),
                                      get=FromResultSet.toList)
     self.query_executor()(query=self.g.V().hasId(TextP.startingWith(test_shard)).drop(),
                           get=FromResultSet.iterate)
     assert not leftover, f'we have some leftover: {leftover}'
     LOGGER.warning('COMPLETED DROP OF ALL NODES')
def run_filters(num_edges, num_matches, transient_id):
    global metrics
    g, conn = gremlin_helper.connect_to_neptune()

    logger.info('Querying neptune')
    tic = time.perf_counter()
    t = g.V().hasLabel('transientId')
    if not transient_id == "":
        # If using Neptune full text search this will perform much faster than the built in Gremlin text search
        t = t.has('uid', TextP.containing(transient_id))
    res = t.limit(num_matches).bothE().otherV().limit(num_edges).path().by(
        __.valueMap().with_(WithOptions.tokens)).toList()

    toc = time.perf_counter()
    logger.info(f'Query Execution: {toc-tic:0.02f} seconds')
    logger.debug('Query Result Count: %s', len(res))
    metrics['neptune_time'] = toc - tic

    nodes_df, edges_df = path_to_df(res)

    # Calculate the metrics
    metrics['node_cnt'] = nodes_df.size
    metrics['edge_cnt'] = edges_df.size
    metrics['prop_cnt'] = (nodes_df.size * nodes_df.columns.size) + \
        (edges_df.size * edges_df.columns.size)

    if nodes_df.size > 0:
        url = plot_url(nodes_df, edges_df)
    else:
        url = ""

    logger.info("Finished compute phase")

    try:
        conn.close()

    except RuntimeError as e:
        if str(
                e
        ) == "There is no current event loop in thread 'ScriptRunner.scriptThread'.":
            loop = asyncio.new_event_loop()
            asyncio.set_event_loop(loop)
            conn.close()
        else:
            raise e

    except Exception as e:
        logger.error('oops in gremlin', exc_info=True)
        raise e

    return {'nodes_df': nodes_df, 'edges_df': edges_df, 'url': url, 'res': res}
Example #3
0
def _dashboard_search_query(graph: GraphTraversalSource,
                            tag_filter: str) -> List[Dict]:
    traversal = graph.V().hasLabel(DashboardMetadata.DASHBOARD_NODE_LABEL)
    traversal = traversal.has('name')
    if tag_filter:
        traversal = traversal.where('published_tag', tag_filter)

    traversal = traversal.project('group_name', 'name', 'cluster',
                                  'description', 'group_description',
                                  'group_url', 'url', 'uri',
                                  'last_successful_run_timestamp',
                                  'query_names', 'chart_names', 'total_usage',
                                  'tags', 'badges')
    traversal = traversal.by(
        __.out(
            DashboardMetadata.DASHBOARD_DASHBOARD_GROUP_RELATION_TYPE).values(
                'name'))  # group_name
    traversal = traversal.by('name')  # name
    traversal = traversal.by(
        __.out(DashboardMetadata.DASHBOARD_DASHBOARD_GROUP_RELATION_TYPE).out(
            DashboardMetadata.DASHBOARD_GROUP_CLUSTER_RELATION_TYPE).values(
                'name'))  # cluster
    traversal = traversal.by(
        __.coalesce(
            __.out(
                DashboardMetadata.DASHBOARD_DESCRIPTION_RELATION_TYPE).values(
                    'description'), __.constant('')))  # description
    traversal = traversal.by(
        __.coalesce(
            __.out(
                DashboardMetadata.DASHBOARD_DASHBOARD_GROUP_RELATION_TYPE).out(
                    DashboardMetadata.DASHBOARD_DESCRIPTION_RELATION_TYPE).
            values('description'), __.constant('')))  # group_description
    traversal = traversal.by(
        __.out(
            DashboardMetadata.DASHBOARD_DASHBOARD_GROUP_RELATION_TYPE).values(
                'dashboard_group_url'))  # group_url
    traversal = traversal.by('dashboard_url')  # dashboard_url
    traversal = traversal.by('key')  # uri

    traversal = traversal.by(
        __.coalesce(
            __.out('EXECUTED').has(
                'key', TextP.endingWith(
                    '_last_successful_execution')).values('timestamp'),
            __.constant('')))  # last_successful_run_timestamp
    traversal = traversal.by(
        __.out(DashboardQuery.DASHBOARD_QUERY_RELATION_TYPE).values(
            'name').dedup().fold())  # query_names
    traversal = traversal.by(
        __.out(DashboardQuery.DASHBOARD_QUERY_RELATION_TYPE).out(
            DashboardChart.CHART_RELATION_TYPE).values(
                'name').dedup().fold())  # chart_names
    traversal = traversal.by(
        __.coalesce(
            __.outE(READ_REVERSE_RELATION_TYPE).values(
                READ_RELATION_COUNT_PROPERTY),
            __.constant(0)).sum())  # total_usage
    traversal = traversal.by(
        __.out('TAGGED_BY').has(
            'tag_type', 'default').values('keys').dedup().fold())  # tags
    traversal = traversal.by(
        __.out('HAS_BADGE').values('keys').dedup().fold())  # badges

    traversal = traversal.order().by(__.select('name'), Order.asc)

    dashboards = traversal.toList()
    for dashboard in dashboards:
        dashboard['product'] = dashboard['uri'].split('_')[0]

    return dashboards