def get_query_relations(self) -> List[GraphRelationship]: relations = [] for table_key in self.table_keys: table_relation = GraphRelationship( start_label=TableMetadata.TABLE_NODE_LABEL, end_label=self.NODE_LABEL, start_key=table_key, end_key=self.get_key_self(), type=self.TABLE_QUERY_RELATION_TYPE, reverse_type=self.INVERSE_TABLE_QUERY_RELATION_TYPE, attributes={} ) relations.append(table_relation) if self.user: user_relation = GraphRelationship( start_label=UserMetadata.USER_NODE_LABEL, end_label=self.NODE_LABEL, start_key=self.user.get_user_model_key(email=self.user.email), end_key=self.get_key_self(), type=self.USER_QUERY_RELATION_TYPE, reverse_type=self.INVERSE_USER_QUERY_RELATION_TYPE, attributes={} ) relations.append(user_relation) return relations
def _create_next_relation(self) -> Iterator[GraphRelationship]: # Dashboard > Metric relation dashboard_metric_relation = GraphRelationship( start_label=MetricMetadata.METRIC_NODE_LABEL, start_key=self._get_metric_key(), end_label=MetricMetadata.DASHBOARD_NODE_LABEL, end_key=self._get_dashboard_key(), type=MetricMetadata.METRIC_DASHBOARD_RELATION_TYPE, reverse_type=MetricMetadata.DASHBOARD_METRIC_RELATION_TYPE, attributes={}) yield dashboard_metric_relation # Metric > Metric description relation if self.description: metric_description_relation = GraphRelationship( start_label=MetricMetadata.METRIC_NODE_LABEL, start_key=self._get_metric_key(), end_label=MetricMetadata.DESCRIPTION_NODE_LABEL, end_key=self._get_metric_description_key(), type=MetricMetadata.METRIC_DESCRIPTION_RELATION_TYPE, reverse_type=MetricMetadata.DESCRIPTION_METRIC_RELATION_TYPE, attributes={}) yield metric_description_relation # Metric > Metric tag relation if self.tags: for tag in self.tags: tag_relation = GraphRelationship( start_label=MetricMetadata.METRIC_NODE_LABEL, start_key=self._get_metric_key(), end_label=TagMetadata.TAG_NODE_LABEL, end_key=TagMetadata.get_tag_key(tag), type=MetricMetadata.METRIC_TAG_RELATION_TYPE, reverse_type=MetricMetadata.TAG_METRIC_RELATION_TYPE, attributes={}) yield tag_relation # Metric > Metric type relation if self.type: type_relation = GraphRelationship( start_label=MetricMetadata.METRIC_NODE_LABEL, start_key=self._get_metric_key(), end_label=MetricMetadata.METRIC_TYPE_NODE_LABEL, end_key=self._get_metric_type_key(), type=MetricMetadata.METRIC_METRIC_TYPE_RELATION_TYPE, reverse_type=MetricMetadata.METRIC_TYPE_METRIC_RELATION_TYPE, attributes={}) yield type_relation # FIXME: this logic is wrong and does nothing presently others: List[Any] = [] for rel_tuple in others: if rel_tuple not in MetricMetadata.serialized_rels: MetricMetadata.serialized_rels.add(rel_tuple) yield rel_tuple
def _create_next_relation(self) -> Iterator[GraphRelationship]: schema_table_relationship = GraphRelationship( start_key=self._get_schema_key(), start_label=TableMetadata.SCHEMA_NODE_LABEL, end_key=self._get_table_key(), end_label=TableMetadata.TABLE_NODE_LABEL, type=TableMetadata.SCHEMA_TABLE_RELATION_TYPE, reverse_type=TableMetadata.TABLE_SCHEMA_RELATION_TYPE, attributes={}) yield schema_table_relationship if self.description: yield self.description.get_relation( TableMetadata.TABLE_NODE_LABEL, self._get_table_key(), self._get_table_description_key(self.description)) if self.tags: for tag in self.tags: tag_relationship = GraphRelationship( start_label=TableMetadata.TABLE_NODE_LABEL, start_key=self._get_table_key(), end_label=TagMetadata.TAG_NODE_LABEL, end_key=TagMetadata.get_tag_key(tag), type=TableMetadata.TABLE_TAG_RELATION_TYPE, reverse_type=TableMetadata.TAG_TABLE_RELATION_TYPE, attributes={}) yield tag_relationship for col in self.columns: yield from self._create_column_relations(col) others = [ GraphRelationship( start_label=TableMetadata.DATABASE_NODE_LABEL, end_label=TableMetadata.CLUSTER_NODE_LABEL, start_key=self._get_database_key(), end_key=self._get_cluster_key(), type=TableMetadata.DATABASE_CLUSTER_RELATION_TYPE, reverse_type=TableMetadata.CLUSTER_DATABASE_RELATION_TYPE, attributes={}), GraphRelationship( start_label=TableMetadata.CLUSTER_NODE_LABEL, end_label=TableMetadata.SCHEMA_NODE_LABEL, start_key=self._get_cluster_key(), end_key=self._get_schema_key(), type=TableMetadata.CLUSTER_SCHEMA_RELATION_TYPE, reverse_type=TableMetadata.SCHEMA_CLUSTER_RELATION_TYPE, attributes={}) ] for rel_tuple in others: if (rel_tuple.start_key, rel_tuple.end_key, rel_tuple.type) not in TableMetadata.serialized_rels_keys: TableMetadata.serialized_rels_keys.add( (rel_tuple.start_key, rel_tuple.end_key, rel_tuple.type)) yield rel_tuple
def _create_rel_iterator(self) -> Iterator[GraphRelationship]: """ Create relations between source table and all the downstream tables :return: """ for downstream_tab in self.downstream_deps: # every deps should follow '{db}://{cluster}.{schema}/{table}' # todo: if we change the table uri, we should change here. m = re.match('(\w+)://(\w+)\.(\w+)\/(\w+)', downstream_tab) if m: # if not match, skip those records relationship = GraphRelationship( start_key=self.get_table_model_key(db=self.db, cluster=self.cluster, schema=self.schema, table=self.table), start_label=TableMetadata.TABLE_NODE_LABEL, end_label=TableMetadata.TABLE_NODE_LABEL, end_key=self.get_table_model_key(db=m.group(1), cluster=m.group(2), schema=m.group(3), table=m.group(4)), type=TableLineage.ORIGIN_DEPENDENCY_RELATION_TYPE, reverse_type=TableLineage.DEPENDENCY_ORIGIN_RELATION_TYPE, attributes={}) yield relationship
def _create_column_relations( self, col: ColumnMetadata) -> Iterator[GraphRelationship]: column_relationship = GraphRelationship( start_label=TableMetadata.TABLE_NODE_LABEL, start_key=self._get_table_key(), end_label=ColumnMetadata.COLUMN_NODE_LABEL, end_key=self._get_col_key(col), type=TableMetadata.TABLE_COL_RELATION_TYPE, reverse_type=TableMetadata.COL_TABLE_RELATION_TYPE, attributes={}) yield column_relationship if col.description: yield col.description.get_relation( ColumnMetadata.COLUMN_NODE_LABEL, self._get_col_key(col), self._get_col_description_key(col, col.description)) if col.badges: badge_metadata = BadgeMetadata( start_label=ColumnMetadata.COLUMN_NODE_LABEL, start_key=self._get_col_key(col), badges=col.badges) badge_relations = badge_metadata.get_badge_relations() for relation in badge_relations: yield relation type_metadata = col.get_type_metadata() if type_metadata: yield from type_metadata.create_relation_iterator()
def create_relation_iterator(self) -> Iterator[GraphRelationship]: yield GraphRelationship( start_label=self.parent_label(), start_key=self.parent_key(), end_label=TypeMetadata.NODE_LABEL, end_key=self.key(), type=self.relation_type(), reverse_type=self.inverse_relation_type(), attributes={} ) if self.description: description_key = self.description_key() assert description_key is not None, f"Could not retrieve description key for {self.name}" yield self.description.get_relation( TypeMetadata.NODE_LABEL, self.key(), description_key ) if not self.is_terminal_type(): assert self.map_key_type is not None, f"Map key type must be set for {self.name}" assert self.map_value_type is not None, f"Map value type must be set for {self.name}" yield from self.map_key_type.create_relation_iterator() yield from self.map_value_type.create_relation_iterator()
def setUp(self) -> None: super(TestWatermark, self).setUp() self.watermark = Watermark(create_time='2017-09-18T00:00:00', database=DATABASE, schema=SCHEMA, table_name=TABLE, cluster=CLUSTER, part_type=PART_TYPE, part_name=NESTED_PART) self.start_key = f'{DATABASE}://{CLUSTER}.{SCHEMA}/{TABLE}/{PART_TYPE}/' self.end_key = f'{DATABASE}://{CLUSTER}.{SCHEMA}/{TABLE}' self.expected_node_result = GraphNode(key=self.start_key, label='Watermark', attributes={ 'partition_key': 'ds', 'partition_value': '2017-09-18/feature_id=9', 'create_time': '2017-09-18T00:00:00' }) self.expected_serialized_node_results = [{ NODE_KEY: self.start_key, NODE_LABEL: 'Watermark', 'partition_key': 'ds', 'partition_value': '2017-09-18/feature_id=9', 'create_time': '2017-09-18T00:00:00' }] self.expected_relation_result = GraphRelationship( start_label='Watermark', end_label='Table', start_key=self.start_key, end_key=self.end_key, type='BELONG_TO_TABLE', reverse_type='WATERMARK', attributes={}) self.expected_serialized_relation_results = [{ RELATION_START_KEY: self.start_key, RELATION_START_LABEL: 'Watermark', RELATION_END_KEY: self.end_key, RELATION_END_LABEL: 'Table', RELATION_TYPE: 'BELONG_TO_TABLE', RELATION_REVERSE_TYPE: 'WATERMARK' }]
def _create_relation_iterator(self) -> Iterator[GraphRelationship]: yield GraphRelationship( start_key=self._get_feature_key(), start_label=FeatureMetadata.NODE_LABEL, end_key=self._get_watermark_key(), end_label=FeatureWatermark.NODE_LABEL, type=FeatureWatermark.FEATURE_WATERMARK_RELATION, reverse_type=FeatureWatermark.WATERMARK_FEATURE_RELATION, attributes={})
def get_query_relations(self) -> Iterator[GraphRelationship]: yield GraphRelationship( start_label=QueryMetadata.NODE_LABEL, end_label=self.NODE_LABEL, start_key=self.query_metadata.get_key_self(), end_key=self.get_key_self(), type=self.QUERY_EXECUTION_RELATION_TYPE, reverse_type=self.INVERSE_QUERY_EXECUTION_RELATION_TYPE, attributes={})
def _create_relation_iterator(self) -> Iterator[GraphRelationship]: yield GraphRelationship( start_label=FeatureMetadata.NODE_LABEL, end_label=FeatureGenerationCode.NODE_LABEL, start_key=self._get_feature_key(), end_key=self._get_generation_code_key(), type=FeatureGenerationCode.FEATURE_GENCODE_RELATION_TYPE, reverse_type=FeatureGenerationCode.GENCODE_FEATURE_RELATION_TYPE, attributes={}, )
def _create_relation_iterator(self) -> Iterator[GraphRelationship]: relationship = GraphRelationship( start_key=self._get_table_key(), start_label=TableMetadata.TABLE_NODE_LABEL, end_key=self._get_user_key(self.user_email), end_label=User.USER_NODE_LABEL, type=ColumnUsageModel.TABLE_USER_RELATION_TYPE, reverse_type=ColumnUsageModel.USER_TABLE_RELATION_TYPE, attributes={ColumnUsageModel.READ_RELATION_COUNT: self.read_count}) yield relationship
def _create_relation_iterator(self) -> Iterator[GraphRelationship]: for email in self.owner_emails: if email: yield GraphRelationship( start_label=self.start_label, start_key=self.start_key, end_label=User.USER_NODE_LABEL, end_key=User.get_user_model_key(email=email), type=OWNER_RELATION_TYPE, reverse_type=OWNER_OF_OBJECT_RELATION_TYPE, attributes={})
def get_relation(self, start_node: str, start_key: Any, end_key: Any) -> GraphRelationship: relationship = GraphRelationship( start_label=start_node, start_key=start_key, end_label=self._label, end_key=end_key, type=DescriptionMetadata.DESCRIPTION_RELATION_TYPE, reverse_type=DescriptionMetadata.INVERSE_DESCRIPTION_RELATION_TYPE, attributes={}) return relationship
def _create_relation_iterator(self) -> Iterator[GraphRelationship]: if self.manager_email: # only create the relation if the manager exists relationship = GraphRelationship( start_key=User.get_user_model_key(email=self.email), start_label=User.USER_NODE_LABEL, end_label=User.USER_NODE_LABEL, end_key=self.get_user_model_key(email=self.manager_email), type=User.USER_MANAGER_RELATION_TYPE, reverse_type=User.MANAGER_USER_RELATION_TYPE, attributes={}) yield relationship
def _create_relation_iterator(self) -> Iterator[GraphRelationship]: yield GraphRelationship( start_label=self.start_label, start_key=self.start_key, end_label=User.USER_NODE_LABEL, end_key=User.get_user_model_key(email=self.user_email), type=READ_REVERSE_RELATION_TYPE, reverse_type=READ_RELATION_TYPE, attributes={ READ_RELATION_COUNT_PROPERTY: self.read_count, } )
def get_query_relations(self) -> Iterator[GraphRelationship]: for table in self.tables: for col in table.columns: yield GraphRelationship( start_label=ColumnMetadata.COLUMN_NODE_LABEL, end_label=self.NODE_LABEL, start_key=table._get_col_key(col), end_key=self.get_key_self(), type=self.COLUMN_WHERE_RELATION_TYPE, reverse_type=self.INVERSE_COLUMN_WHERE_RELATION_TYPE, attributes={}) # Optional Query to Where Clause if self.query_metadata: yield GraphRelationship( start_label=QueryMetadata.NODE_LABEL, end_label=self.NODE_LABEL, start_key=self.query_metadata.get_key_self(), end_key=self.get_key_self(), type=self.QUERY_WHERE_RELATION_TYPE, reverse_type=self.INVERSE_QUERY_WHERE_RELATION_TYPE, attributes={})
def _create_rel_iterator(self) -> Iterator[GraphRelationship]: for col_reader in self.col_readers: relationship = GraphRelationship( start_label=TableMetadata.TABLE_NODE_LABEL, start_key=self._get_table_key(col_reader), end_label=User.USER_NODE_LABEL, end_key=self._get_user_key(col_reader.user_email), type=TableColumnUsage.TABLE_USER_RELATION_TYPE, reverse_type=TableColumnUsage.USER_TABLE_RELATION_TYPE, attributes={ TableColumnUsage.READ_RELATION_COUNT: col_reader.read_count }) yield relationship
def get_badge_relations(self) -> List[GraphRelationship]: relations = [] for badge in self.badges: relation = GraphRelationship( start_label=self.start_label, end_label=self.BADGE_NODE_LABEL, start_key=self.start_key, end_key=self.get_badge_key(badge.name), type=self.BADGE_RELATION_TYPE, reverse_type=self.INVERSE_BADGE_RELATION_TYPE, attributes={}) relations.append(relation) return relations
def _create_relation_iterator(self) -> Iterator[GraphRelationship]: """ Create relation map between table stat record with original table :return: """ relationship = GraphRelationship( start_key=self.get_table_stat_model_key(), start_label=LABEL, end_key=self.get_table_key(), end_label=TableMetadata.TABLE_NODE_LABEL, type=STAT_RESOURCE_RELATION_TYPE, reverse_type=RESOURCE_STAT_RELATION_TYPE, attributes={}) yield relationship
def _create_relation_iterator(self) -> Iterator[GraphRelationship]: """ Create relation map between table stat record with original hive table :return: """ relationship = GraphRelationship( start_key=self.get_table_stat_model_key(), start_label=TableColumnStats.LABEL, end_key=self.get_col_key(), end_label=ColumnMetadata.COLUMN_NODE_LABEL, type=TableColumnStats.STAT_Column_RELATION_TYPE, reverse_type=TableColumnStats.Column_STAT_RELATION_TYPE, attributes={}) yield relationship
def _create_relation_iterator(self) -> Iterator[GraphRelationship]: relationship = GraphRelationship( start_key=DashboardMetadata.DASHBOARD_KEY_FORMAT.format( product=self._product, cluster=self._cluster, dashboard_group=self._dashboard_group_id, dashboard_name=self._dashboard_id), start_label=DashboardMetadata.DASHBOARD_NODE_LABEL, end_key=self._get_last_modified_node_key(), end_label=timestamp_constants.NODE_LABEL, type=timestamp_constants.LASTUPDATED_RELATION_TYPE, reverse_type=timestamp_constants.LASTUPDATED_REVERSE_RELATION_TYPE, attributes={}) yield relationship
def _create_relation_iterator(self) -> Iterator[GraphRelationship]: """ Create relations mapping last updated node with table node :return: """ relationship = GraphRelationship( start_label=TableMetadata.TABLE_NODE_LABEL, start_key=self.get_table_model_key(), end_label=TableLastUpdated.LAST_UPDATED_NODE_LABEL, end_key=self.get_last_updated_model_key(), type=TableLastUpdated.TABLE_LASTUPDATED_RELATION_TYPE, reverse_type=TableLastUpdated.LASTUPDATED_TABLE_RELATION_TYPE, attributes={}) yield relationship
def _create_relation_iterator(self) -> Iterator[GraphRelationship]: """ Create relation map between watermark record with original table :return: """ relation = GraphRelationship( start_key=self.get_watermark_model_key(), start_label=Watermark.LABEL, end_key=self.get_metadata_model_key(), end_label='Table', type=Watermark.WATERMARK_TABLE_RELATION_TYPE, reverse_type=Watermark.TABLE_WATERMARK_RELATION_TYPE, attributes={}) yield relation
def _create_relation_iterator(self) -> Iterator[GraphRelationship]: relationship = GraphRelationship( start_label=DashboardMetadata.DASHBOARD_NODE_LABEL, end_label=User.USER_NODE_LABEL, start_key=DashboardMetadata.DASHBOARD_KEY_FORMAT.format( product=self._product, cluster=self._cluster, dashboard_group=self._dashboard_group_id, dashboard_name=self._dashboard_id), end_key=User.get_user_model_key(email=self._email), type=READ_REVERSE_RELATION_TYPE, reverse_type=READ_RELATION_TYPE, attributes={READ_RELATION_COUNT_PROPERTY: self._view_count}) yield relationship
def _create_relation_iterator(self) -> Iterator[GraphRelationship]: """ Create relation map between owner record with original hive table :return: """ relationship = GraphRelationship( start_label=TableSource.LABEL, start_key=self.get_source_model_key(), end_label=TableMetadata.TABLE_NODE_LABEL, end_key=self.get_metadata_model_key(), type=TableSource.SOURCE_TABLE_RELATION_TYPE, reverse_type=TableSource.TABLE_SOURCE_RELATION_TYPE, attributes={}) yield relationship
def _create_relation_iterator(self) -> Iterator[GraphRelationship]: relationship = GraphRelationship( start_label=DashboardMetadata.DASHBOARD_NODE_LABEL, end_label=DashboardQuery.DASHBOARD_QUERY_LABEL, start_key=DashboardMetadata.DASHBOARD_KEY_FORMAT.format( product=self._product, cluster=self._cluster, dashboard_group=self._dashboard_group_id, dashboard_name=self._dashboard_id), end_key=self._get_query_node_key(), type=DashboardQuery.DASHBOARD_QUERY_RELATION_TYPE, reverse_type=DashboardQuery.QUERY_DASHBOARD_RELATION_TYPE, attributes={}) yield relationship
def create_relation(self) -> List[GraphRelationship]: """ Create a list of relation map between watermark record with original table :return: """ relation = GraphRelationship( start_key=self.get_watermark_model_key(), start_label=Watermark.LABEL, end_key=self.get_metadata_model_key(), end_label='Table', type=Watermark.WATERMARK_TABLE_RELATION_TYPE, reverse_type=Watermark.TABLE_WATERMARK_RELATION_TYPE, attributes={}) results = [relation] return results
def _create_rel_iterator(self) -> Iterator[GraphRelationship]: """ Create relations between source table and all the downstream tables :return: """ for downstream_key in self.downstream_deps: relationship = GraphRelationship( start_key=self.table_key, start_label=TableMetadata.TABLE_NODE_LABEL, end_label=TableMetadata.TABLE_NODE_LABEL, end_key=downstream_key, type=TableLineage.ORIGIN_DEPENDENCY_RELATION_TYPE, reverse_type=TableLineage.DEPENDENCY_ORIGIN_RELATION_TYPE, attributes={}) yield relationship
def create_relation(self) -> Iterable[GraphRelationship]: result = [] for actor in self._actors: movie_actor_relation = GraphRelationship( start_key=Movie.KEY_FORMAT.format(self._name), end_key=Actor.KEY_FORMAT.format(actor.name), start_label=Movie.LABEL, end_label=Actor.LABEL, type=Movie.MOVIE_ACTOR_RELATION_TYPE, reverse_type=Movie.ACTOR_MOVIE_RELATION_TYPE, attributes={}) result.append(movie_actor_relation) for city in self._cities: city_movie_relation = GraphRelationship( start_key=City.KEY_FORMAT.format(self._name), end_key=City.KEY_FORMAT.format(city.name), start_label=Movie.LABEL, end_label=City.LABEL, type=Movie.MOVIE_CITY_RELATION_TYPE, reverse_type=Movie.CITY_MOVIE_RELATION_TYPE, attributes={}) result.append(city_movie_relation) return result
def _create_relation_iterator(self) -> Iterator[GraphRelationship]: relationship = GraphRelationship( start_label=DashboardQuery.DASHBOARD_QUERY_LABEL, start_key=DashboardQuery.DASHBOARD_QUERY_KEY_FORMAT.format( product=self._product, cluster=self._cluster, dashboard_group_id=self._dashboard_group_id, dashboard_id=self._dashboard_id, query_id=self._query_id), end_label=DashboardChart.DASHBOARD_CHART_LABEL, end_key=self._get_chart_node_key(), type=DashboardChart.CHART_RELATION_TYPE, reverse_type=DashboardChart.CHART_REVERSE_RELATION_TYPE, attributes={}) yield relationship