def drop(self) -> None: test_shard = get_shard() g = self.g.V() if test_shard: g = g.has(WellKnownProperties.TestShard.value.name, test_shard) g = g.drop() LOGGER.warning('DROPPING ALL NODES') self.query_executor()(query=g, get=FromResultSet.iterate) # we seem to mess this up easily leftover = self.query_executor()(query=self.g.V().hasId(TextP.startingWith(test_shard)).id(), get=FromResultSet.toList) self.query_executor()(query=self.g.V().hasId(TextP.startingWith(test_shard)).drop(), get=FromResultSet.iterate) assert not leftover, f'we have some leftover: {leftover}' LOGGER.warning('COMPLETED DROP OF ALL NODES')
def run_filters(num_edges, num_matches, transient_id): global metrics g, conn = gremlin_helper.connect_to_neptune() logger.info('Querying neptune') tic = time.perf_counter() t = g.V().hasLabel('transientId') if not transient_id == "": # If using Neptune full text search this will perform much faster than the built in Gremlin text search t = t.has('uid', TextP.containing(transient_id)) res = t.limit(num_matches).bothE().otherV().limit(num_edges).path().by( __.valueMap().with_(WithOptions.tokens)).toList() toc = time.perf_counter() logger.info(f'Query Execution: {toc-tic:0.02f} seconds') logger.debug('Query Result Count: %s', len(res)) metrics['neptune_time'] = toc - tic nodes_df, edges_df = path_to_df(res) # Calculate the metrics metrics['node_cnt'] = nodes_df.size metrics['edge_cnt'] = edges_df.size metrics['prop_cnt'] = (nodes_df.size * nodes_df.columns.size) + \ (edges_df.size * edges_df.columns.size) if nodes_df.size > 0: url = plot_url(nodes_df, edges_df) else: url = "" logger.info("Finished compute phase") try: conn.close() except RuntimeError as e: if str( e ) == "There is no current event loop in thread 'ScriptRunner.scriptThread'.": loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) conn.close() else: raise e except Exception as e: logger.error('oops in gremlin', exc_info=True) raise e return {'nodes_df': nodes_df, 'edges_df': edges_df, 'url': url, 'res': res}
def _dashboard_search_query(graph: GraphTraversalSource, tag_filter: str) -> List[Dict]: traversal = graph.V().hasLabel(DashboardMetadata.DASHBOARD_NODE_LABEL) traversal = traversal.has('name') if tag_filter: traversal = traversal.where('published_tag', tag_filter) traversal = traversal.project('group_name', 'name', 'cluster', 'description', 'group_description', 'group_url', 'url', 'uri', 'last_successful_run_timestamp', 'query_names', 'chart_names', 'total_usage', 'tags', 'badges') traversal = traversal.by( __.out( DashboardMetadata.DASHBOARD_DASHBOARD_GROUP_RELATION_TYPE).values( 'name')) # group_name traversal = traversal.by('name') # name traversal = traversal.by( __.out(DashboardMetadata.DASHBOARD_DASHBOARD_GROUP_RELATION_TYPE).out( DashboardMetadata.DASHBOARD_GROUP_CLUSTER_RELATION_TYPE).values( 'name')) # cluster traversal = traversal.by( __.coalesce( __.out( DashboardMetadata.DASHBOARD_DESCRIPTION_RELATION_TYPE).values( 'description'), __.constant(''))) # description traversal = traversal.by( __.coalesce( __.out( DashboardMetadata.DASHBOARD_DASHBOARD_GROUP_RELATION_TYPE).out( DashboardMetadata.DASHBOARD_DESCRIPTION_RELATION_TYPE). values('description'), __.constant(''))) # group_description traversal = traversal.by( __.out( DashboardMetadata.DASHBOARD_DASHBOARD_GROUP_RELATION_TYPE).values( 'dashboard_group_url')) # group_url traversal = traversal.by('dashboard_url') # dashboard_url traversal = traversal.by('key') # uri traversal = traversal.by( __.coalesce( __.out('EXECUTED').has( 'key', TextP.endingWith( '_last_successful_execution')).values('timestamp'), __.constant(''))) # last_successful_run_timestamp traversal = traversal.by( __.out(DashboardQuery.DASHBOARD_QUERY_RELATION_TYPE).values( 'name').dedup().fold()) # query_names traversal = traversal.by( __.out(DashboardQuery.DASHBOARD_QUERY_RELATION_TYPE).out( DashboardChart.CHART_RELATION_TYPE).values( 'name').dedup().fold()) # chart_names traversal = traversal.by( __.coalesce( __.outE(READ_REVERSE_RELATION_TYPE).values( READ_RELATION_COUNT_PROPERTY), __.constant(0)).sum()) # total_usage traversal = traversal.by( __.out('TAGGED_BY').has( 'tag_type', 'default').values('keys').dedup().fold()) # tags traversal = traversal.by( __.out('HAS_BADGE').values('keys').dedup().fold()) # badges traversal = traversal.order().by(__.select('name'), Order.asc) dashboards = traversal.toList() for dashboard in dashboards: dashboard['product'] = dashboard['uri'].split('_')[0] return dashboards