def test_feature_table_extraction_with_description_stream(self) -> None:
        self._init_extractor(programmatic_description_enabled=True)
        self.extractor._client.list_projects.return_value = ["default"]
        self._mock_feature_table(add_stream_source=True)

        feature_table_definition = self.extractor.extract()
        assert isinstance(feature_table_definition, TableMetadata)

        description = self.extractor.extract()
        assert isinstance(description, TableMetadata)
        expected = DescriptionMetadata(
            TestFeastExtractor._strip_margin(
                """* Created at **2020-01-01 00:00:00**
                  |"""),
            "feature_table_details",
        )
        self.assertEqual(expected.__repr__(),
                         description.description.__repr__())

        batch_source = self.extractor.extract()
        assert isinstance(batch_source, TableMetadata)
        expected = DescriptionMetadata(
            TestFeastExtractor._strip_margin("""```
                |fileOptions:
                |  fileFormat:
                |    parquetFormat: {}
                |  fileUrl: file:///some/location
                |type: BATCH_FILE
                |```"""),
            "batch_source",
        )
        self.assertEqual(expected.__repr__(),
                         batch_source.description.__repr__())

        stream_source = self.extractor.extract()
        assert isinstance(stream_source, TableMetadata)
        expected = DescriptionMetadata(
            TestFeastExtractor._strip_margin("""```
                 |createdTimestampColumn: datetime
                 |eventTimestampColumn: datetime
                 |kafkaOptions:
                 |  bootstrapServers: broker1
                 |  messageFormat:
                 |    avroFormat:
                 |      schemaJson: '{"type": "record", "name": "DriverTrips", "fields": [{"name": "driver_id",
                 |        "type": "long"}, {"name": "trips_today", "type": "int"}, {"name": "datetime",
                 |        "type": {"type": "long", "logicalType": "timestamp-micros"}}]}'
                 |  topic: driver_trips
                 |type: STREAM_KAFKA
                 |```"""),
            "stream_source",
        )
        self.assertEqual(expected.__repr__(),
                         stream_source.description.__repr__())

        self.assertIsNone(self.extractor.extract())
Esempio n. 2
0
 def __init__(self,
              schema_key: str,
              schema: str,
              description: str = None,
              description_source: str = None,
              **kwargs: Any) -> None:
     self._schema_key = schema_key
     self._schema = schema
     self._description = DescriptionMetadata.create_description_metadata(text=description,
                                                                         source=description_source) \
         if description else None
     self._node_iterator = self._create_node_iterator()
     self._relation_iterator = self._create_relation_iterator()
Esempio n. 3
0
    def test_serialize_column_with_source_description_metadata(self) -> None:
        description_metadata = DescriptionMetadata(
            text='test text 3',
            start_label='Column',
            start_key='test_start_key',
            description_key='customized_key',
            source='external',
        )
        node_row = description_metadata.next_node()
        actual = []
        while node_row:
            node_row_serialized = neo4_serializer.serialize_node(node_row)
            actual.append(node_row_serialized)
            node_row = description_metadata.next_node()
        expected = [
            {
                'description': 'test text 3',
                'KEY': 'customized_key',
                'LABEL': 'Programmatic_Description',
                'description_source': 'external'
            },
        ]
        self.assertEqual(actual, expected)

        relation_row = description_metadata.next_relation()
        actual = []
        while relation_row:
            relation_row_serialized = neo4_serializer.serialize_relationship(
                relation_row)
            actual.append(relation_row_serialized)
            relation_row = description_metadata.next_relation()
        expected = [{
            'START_KEY': 'test_start_key',
            'START_LABEL': 'Column',
            'END_KEY': 'customized_key',
            'END_LABEL': 'Programmatic_Description',
            'TYPE': 'DESCRIPTION',
            'REVERSE_TYPE': 'DESCRIPTION_OF'
        }]
        self.assertEqual(actual, expected)
    def test_feature_table_extraction_with_description_batch(self) -> None:
        self._init_extractor(programmatic_description_enabled=True)
        self.extractor._client.list_projects.return_value = ["default"]
        self._mock_feature_table(labels={"label1": "value1"})

        feature_table_definition = self.extractor.extract()
        assert isinstance(feature_table_definition, TableMetadata)

        description = self.extractor.extract()
        assert isinstance(description, TableMetadata)
        expected = DescriptionMetadata(
            TestFeastExtractor._strip_margin(
                """* Created at **2020-01-01 00:00:00**
                  |* Labels:
                  |    * label1: **value1**
                  |"""),
            "feature_table_details",
        )
        self.assertEqual(expected.__repr__(),
                         description.description.__repr__())

        batch_source = self.extractor.extract()
        assert isinstance(batch_source, TableMetadata)
        expected = DescriptionMetadata(
            TestFeastExtractor._strip_margin("""```
                |fileOptions:
                |  fileFormat:
                |    parquetFormat: {}
                |  fileUrl: file:///some/location
                |type: BATCH_FILE
                |```"""),
            "batch_source",
        )
        self.assertEqual(expected.__repr__(),
                         batch_source.description.__repr__())

        self.assertIsNone(self.extractor.extract())
Esempio n. 5
0
    def test_serialize_table_description_metadata(self) -> None:
        description_metadata = DescriptionMetadata(text='test text 1',
                                                   start_label='Table',
                                                   start_key='test_start_key')
        node_row = description_metadata.next_node()
        actual = []
        while node_row:
            node_row_serialized = neo4_serializer.serialize_node(node_row)
            actual.append(node_row_serialized)
            node_row = description_metadata.next_node()
        expected = [
            {
                'description': 'test text 1',
                'KEY': 'test_start_key/_description',
                'LABEL': 'Description',
                'description_source': 'description'
            },
        ]
        self.assertEqual(actual, expected)

        relation_row = description_metadata.next_relation()
        actual = []
        while relation_row:
            relation_row_serialized = neo4_serializer.serialize_relationship(
                relation_row)
            actual.append(relation_row_serialized)
            relation_row = description_metadata.next_relation()
        expected = [{
            'START_KEY': 'test_start_key',
            'START_LABEL': 'Table',
            'END_KEY': 'test_start_key/_description',
            'END_LABEL': 'Description',
            'TYPE': 'DESCRIPTION',
            'REVERSE_TYPE': 'DESCRIPTION_OF'
        }]
        self.assertEqual(actual, expected)
Esempio n. 6
0
    def test_raise_exception_when_missing_data(self) -> None:
        # assert raise when missing description node key
        self.assertRaises(Exception,
                          DescriptionMetadata(text='test_text').next_node)
        DescriptionMetadata(text='test_text',
                            description_key='test_key').next_node()
        DescriptionMetadata(text='test_text',
                            start_key='start_key').next_node()

        # assert raise when missing relation start label
        self.assertRaises(
            Exception,
            DescriptionMetadata(text='test_text',
                                start_key='start_key').next_relation)
        DescriptionMetadata(text='test_text',
                            start_key='test_key',
                            start_label='Table').next_relation()

        # assert raise when missing relation start key
        self.assertRaises(
            Exception,
            DescriptionMetadata(text='test_text',
                                description_key='test_key',
                                start_label='Table').next_relation)