def test_feature_table_extraction_with_description_stream(self) -> None: self._init_extractor(programmatic_description_enabled=True) self.extractor._client.list_projects.return_value = ["default"] self._mock_feature_table(add_stream_source=True) feature_table_definition = self.extractor.extract() assert isinstance(feature_table_definition, TableMetadata) description = self.extractor.extract() assert isinstance(description, TableMetadata) expected = DescriptionMetadata( TestFeastExtractor._strip_margin( """* Created at **2020-01-01 00:00:00** |"""), "feature_table_details", ) self.assertEqual(expected.__repr__(), description.description.__repr__()) batch_source = self.extractor.extract() assert isinstance(batch_source, TableMetadata) expected = DescriptionMetadata( TestFeastExtractor._strip_margin("""``` |fileOptions: | fileFormat: | parquetFormat: {} | fileUrl: file:///some/location |type: BATCH_FILE |```"""), "batch_source", ) self.assertEqual(expected.__repr__(), batch_source.description.__repr__()) stream_source = self.extractor.extract() assert isinstance(stream_source, TableMetadata) expected = DescriptionMetadata( TestFeastExtractor._strip_margin("""``` |createdTimestampColumn: datetime |eventTimestampColumn: datetime |kafkaOptions: | bootstrapServers: broker1 | messageFormat: | avroFormat: | schemaJson: '{"type": "record", "name": "DriverTrips", "fields": [{"name": "driver_id", | "type": "long"}, {"name": "trips_today", "type": "int"}, {"name": "datetime", | "type": {"type": "long", "logicalType": "timestamp-micros"}}]}' | topic: driver_trips |type: STREAM_KAFKA |```"""), "stream_source", ) self.assertEqual(expected.__repr__(), stream_source.description.__repr__()) self.assertIsNone(self.extractor.extract())
def test_feature_table_extraction_with_description_batch(self) -> None: self._init_extractor(programmatic_description_enabled=True) self.extractor._client.list_projects.return_value = ["default"] self._mock_feature_table(labels={"label1": "value1"}) feature_table_definition = self.extractor.extract() assert isinstance(feature_table_definition, TableMetadata) description = self.extractor.extract() assert isinstance(description, TableMetadata) expected = DescriptionMetadata( TestFeastExtractor._strip_margin( """* Created at **2020-01-01 00:00:00** |* Labels: | * label1: **value1** |"""), "feature_table_details", ) self.assertEqual(expected.__repr__(), description.description.__repr__()) batch_source = self.extractor.extract() assert isinstance(batch_source, TableMetadata) expected = DescriptionMetadata( TestFeastExtractor._strip_margin("""``` |fileOptions: | fileFormat: | parquetFormat: {} | fileUrl: file:///some/location |type: BATCH_FILE |```"""), "batch_source", ) self.assertEqual(expected.__repr__(), batch_source.description.__repr__()) self.assertIsNone(self.extractor.extract())
def test_serialize_column_with_source_description_metadata(self) -> None: description_metadata = DescriptionMetadata( text='test text 3', start_label='Column', start_key='test_start_key', description_key='customized_key', source='external', ) node_row = description_metadata.next_node() actual = [] while node_row: node_row_serialized = neo4_serializer.serialize_node(node_row) actual.append(node_row_serialized) node_row = description_metadata.next_node() expected = [ { 'description': 'test text 3', 'KEY': 'customized_key', 'LABEL': 'Programmatic_Description', 'description_source': 'external' }, ] self.assertEqual(actual, expected) relation_row = description_metadata.next_relation() actual = [] while relation_row: relation_row_serialized = neo4_serializer.serialize_relationship( relation_row) actual.append(relation_row_serialized) relation_row = description_metadata.next_relation() expected = [{ 'START_KEY': 'test_start_key', 'START_LABEL': 'Column', 'END_KEY': 'customized_key', 'END_LABEL': 'Programmatic_Description', 'TYPE': 'DESCRIPTION', 'REVERSE_TYPE': 'DESCRIPTION_OF' }] self.assertEqual(actual, expected)
def test_serialize_table_description_metadata(self) -> None: description_metadata = DescriptionMetadata(text='test text 1', start_label='Table', start_key='test_start_key') node_row = description_metadata.next_node() actual = [] while node_row: node_row_serialized = neo4_serializer.serialize_node(node_row) actual.append(node_row_serialized) node_row = description_metadata.next_node() expected = [ { 'description': 'test text 1', 'KEY': 'test_start_key/_description', 'LABEL': 'Description', 'description_source': 'description' }, ] self.assertEqual(actual, expected) relation_row = description_metadata.next_relation() actual = [] while relation_row: relation_row_serialized = neo4_serializer.serialize_relationship( relation_row) actual.append(relation_row_serialized) relation_row = description_metadata.next_relation() expected = [{ 'START_KEY': 'test_start_key', 'START_LABEL': 'Table', 'END_KEY': 'test_start_key/_description', 'END_LABEL': 'Description', 'TYPE': 'DESCRIPTION', 'REVERSE_TYPE': 'DESCRIPTION_OF' }] self.assertEqual(actual, expected)
def test_raise_exception_when_missing_data(self) -> None: # assert raise when missing description node key self.assertRaises(Exception, DescriptionMetadata(text='test_text').next_node) DescriptionMetadata(text='test_text', description_key='test_key').next_node() DescriptionMetadata(text='test_text', start_key='start_key').next_node() # assert raise when missing relation start label self.assertRaises( Exception, DescriptionMetadata(text='test_text', start_key='start_key').next_relation) DescriptionMetadata(text='test_text', start_key='test_key', start_label='Table').next_relation() # assert raise when missing relation start key self.assertRaises( Exception, DescriptionMetadata(text='test_text', description_key='test_key', start_label='Table').next_relation)