def _user_search_query(graph: GraphTraversalSource, tag_filter: str) -> List[Dict]: traversal = graph.V().hasLabel(User.USER_NODE_LABEL) traversal = traversal.has(User.USER_NODE_FULL_NAME) if tag_filter: traversal = traversal.where('published_tag', tag_filter) traversal = traversal.project('email', 'first_name', 'last_name', 'full_name', 'github_username', 'team_name', 'employee_type', 'manager_email', 'slack_id', 'is_active', 'role_name', 'total_read', 'total_own', 'total_follow') traversal = traversal.by('email') # email traversal = traversal.by('first_name') # first_name traversal = traversal.by('last_name') # last_name traversal = traversal.by('full_name') # full_name traversal = traversal.by('github_username') # github_username traversal = traversal.by('team_name') # team_name traversal = traversal.by('employee_type') # employee_type traversal = traversal.by( __.coalesce( __.out(User.USER_MANAGER_RELATION_TYPE).values('email'), __.constant(''))) # manager_email traversal = traversal.by('slack_id') # slack_id traversal = traversal.by('is_active') # is_active traversal = traversal.by('role_name') # role_name traversal = traversal.by( __.coalesce( __.outE(READ_RELATION_TYPE).values('read_count'), __.constant(0)).sum()) # total_read traversal = traversal.by( __.outE(OWNER_OF_OBJECT_RELATION_TYPE).fold().count()) # total_own traversal = traversal.by( __.outE('FOLLOWED_BY').fold().count()) # total_follow traversal = traversal.order().by(__.select('email'), Order.asc) return traversal.toList()
def _table_search_query(graph: GraphTraversalSource, tag_filter: str) -> List[Dict]: traversal = graph.V().hasLabel(TableMetadata.TABLE_NODE_LABEL) if tag_filter: traversal = traversal.has('published_tag', tag_filter) traversal = traversal.project('database', 'cluster', 'schema', 'schema_description', 'name', 'key', 'description', 'last_updated_timestamp', 'column_names', 'column_descriptions', 'total_usage', 'unique_usage', 'tags', 'badges', 'programmatic_descriptions') traversal = traversal.by( __.out(TableMetadata.TABLE_SCHEMA_RELATION_TYPE).out( SCHEMA_REVERSE_RELATION_TYPE).out( CLUSTER_REVERSE_RELATION_TYPE).values('name')) # database traversal = traversal.by( __.out(TableMetadata.TABLE_SCHEMA_RELATION_TYPE).out( SCHEMA_REVERSE_RELATION_TYPE).values('name')) # cluster traversal = traversal.by( __.out( TableMetadata.TABLE_SCHEMA_RELATION_TYPE).values('name')) # schema traversal = traversal.by( __.coalesce( __.out(TableMetadata.TABLE_SCHEMA_RELATION_TYPE).out( DescriptionMetadata.DESCRIPTION_RELATION_TYPE).values( 'description'), __.constant(''))) # schema_description traversal = traversal.by('name') # name traversal = traversal.by(T.id) # key traversal = traversal.by( __.coalesce( __.out(DescriptionMetadata.DESCRIPTION_RELATION_TYPE).values( 'description'), __.constant(''))) # description traversal = traversal.by( __.coalesce( __.out(LASTUPDATED_RELATION_TYPE).values(TIMESTAMP_PROPERTY), __.constant(''))) # last_updated_timestamp traversal = traversal.by( __.out(TableMetadata.TABLE_COL_RELATION_TYPE).values( 'name').fold()) # column_names traversal = traversal.by( __.out(TableMetadata.TABLE_COL_RELATION_TYPE).out( DescriptionMetadata.DESCRIPTION_RELATION_TYPE).values( 'description').fold()) # column_descriptions traversal = traversal.by( __.coalesce( __.outE(READ_REVERSE_RELATION_TYPE).values('read_count'), __.constant(0)).sum()) # total_usage traversal = traversal.by( __.outE(READ_REVERSE_RELATION_TYPE).count()) # unique_usage traversal = traversal.by( __.inE(TableMetadata.TAG_TABLE_RELATION_TYPE).outV().values( METADATA_KEY_PROPERTY_NAME).fold()) # tags traversal = traversal.by( __.out('HAS_BADGE').values('keys').dedup().fold()) # badges traversal = traversal.by( __.out(DescriptionMetadata.PROGRAMMATIC_DESCRIPTION_NODE_LABEL).values( 'description').fold()) # programmatic_descriptions traversal = traversal.order().by(__.select('name'), Order.asc) return traversal.toList()
def handle_youtube_video_added(self, video_id, user_id, name, description, location, preview_image_location, tags, added_date, timestamp): # make sure tags are unique (no duplicates) unique_tags = set(tags) logging.debug('SuggestedVideosService:handle_youtube_video_added, video ID: ' + str(video_id) + ', user ID: ' + str(user_id) + ', name: ' + name + ', description: ' + description + ', location: ' + location + ', preview_image_location: ' + preview_image_location + ', tags: ' + str(unique_tags) + ', timestamp: ' + str(timestamp)) # Note: building a single traversal, but broken into several steps for readability # locate user vertex traversal = self.graph.V().has('user', 'userId', user_id).as_('^user') # add video vertex traversal = traversal.addV('video').property('videoId', video_id)\ .property('added_date', added_date) \ .property('description', description) \ .property('name', name) \ .property('preview_image_location', preview_image_location) \ .as_('^video') # add edge from user to video vertex traversal = traversal.addE('uploaded').from_('^user').to('^video').property('added_date', added_date) # find vertices for tags and add edges from video vertex for tag in unique_tags: traversal = traversal.addE('taggedWith').from_('^video').to(__.coalesce( __.V().has('tag', 'name', tag), __.addV('tag').property('name', tag).property('tagged_date', added_date))) # execute the traversal traversal.iterate()
def _dashboard_search_query(graph: GraphTraversalSource, tag_filter: str) -> List[Dict]: traversal = graph.V().hasLabel(DashboardMetadata.DASHBOARD_NODE_LABEL) traversal = traversal.has('name') if tag_filter: traversal = traversal.where('published_tag', tag_filter) traversal = traversal.project('group_name', 'name', 'cluster', 'description', 'group_description', 'group_url', 'url', 'uri', 'last_successful_run_timestamp', 'query_names', 'chart_names', 'total_usage', 'tags', 'badges') traversal = traversal.by( __.out( DashboardMetadata.DASHBOARD_DASHBOARD_GROUP_RELATION_TYPE).values( 'name')) # group_name traversal = traversal.by('name') # name traversal = traversal.by( __.out(DashboardMetadata.DASHBOARD_DASHBOARD_GROUP_RELATION_TYPE).out( DashboardMetadata.DASHBOARD_GROUP_CLUSTER_RELATION_TYPE).values( 'name')) # cluster traversal = traversal.by( __.coalesce( __.out( DashboardMetadata.DASHBOARD_DESCRIPTION_RELATION_TYPE).values( 'description'), __.constant(''))) # description traversal = traversal.by( __.coalesce( __.out( DashboardMetadata.DASHBOARD_DASHBOARD_GROUP_RELATION_TYPE).out( DashboardMetadata.DASHBOARD_DESCRIPTION_RELATION_TYPE). values('description'), __.constant(''))) # group_description traversal = traversal.by( __.out( DashboardMetadata.DASHBOARD_DASHBOARD_GROUP_RELATION_TYPE).values( 'dashboard_group_url')) # group_url traversal = traversal.by('dashboard_url') # dashboard_url traversal = traversal.by('key') # uri traversal = traversal.by( __.coalesce( __.out('EXECUTED').has( 'key', TextP.endingWith( '_last_successful_execution')).values('timestamp'), __.constant(''))) # last_successful_run_timestamp traversal = traversal.by( __.out(DashboardQuery.DASHBOARD_QUERY_RELATION_TYPE).values( 'name').dedup().fold()) # query_names traversal = traversal.by( __.out(DashboardQuery.DASHBOARD_QUERY_RELATION_TYPE).out( DashboardChart.CHART_RELATION_TYPE).values( 'name').dedup().fold()) # chart_names traversal = traversal.by( __.coalesce( __.outE(READ_REVERSE_RELATION_TYPE).values( READ_RELATION_COUNT_PROPERTY), __.constant(0)).sum()) # total_usage traversal = traversal.by( __.out('TAGGED_BY').has( 'tag_type', 'default').values('keys').dedup().fold()) # tags traversal = traversal.by( __.out('HAS_BADGE').values('keys').dedup().fold()) # badges traversal = traversal.order().by(__.select('name'), Order.asc) dashboards = traversal.toList() for dashboard in dashboards: dashboard['product'] = dashboard['uri'].split('_')[0] return dashboards