예제 #1
0
    def test_to_json(self):
        # type: () -> None
        """
        Test string generated from to_json method
        """
        test_obj = TableESDocument(
            elasticsearch_index='test_index',
            elasticsearch_type='test_type',
            database='test_database',
            cluster='test_cluster',
            schema_name='test_schema',
            table_name='test_table',
            table_key='test_table_key',
            table_last_updated_epoch=123456789,
            table_description='test_table_description',
            column_names=['test_col1', 'test_col2'],
            column_descriptions=['test_description1', 'test_description2'],
            total_usage=100,
            unique_usage=10,
            tag_names=['test'])

        expected_index_dict = {
            "index": {
                "_type": "test_type",
                "_index": "test_index"
            }
        }
        expected_document_dict = {
            "database": "test_database",
            "cluster": "test_cluster",
            "schema_name": "test_schema",
            "table_name": "test_table",
            "table_key": "test_table_key",
            "table_last_updated_epoch": 123456789,
            "table_description": "test_table_description",
            "column_names": ["test_col1", "test_col2"],
            "column_descriptions": ["test_description1", "test_description2"],
            "total_usage": 100,
            "unique_usage": 10,
            "tag_names": ["test"]
        }

        result = test_obj.to_json()
        results = result.split("\n")

        # verify two new line characters in result
        self.assertEqual(
            len(results), 3,
            "Result from to_json() function doesn't have 2 newlines!")

        self.assertDictEqual(json.loads(results[0]), expected_index_dict)
        self.assertDictEqual(json.loads(results[1]), expected_document_dict)
예제 #2
0
    def test_to_json(self):
        # type: () -> None
        """
        Test string generated from to_json method
        """
        test_obj = TableESDocument(
            database='test_database',
            cluster='test_cluster',
            schema='test_schema',
            name='test_table',
            key='test_table_key',
            last_updated_timestamp=123456789,
            description='test_table_description',
            column_names=['test_col1', 'test_col2'],
            column_descriptions=['test_description1', 'test_description2'],
            total_usage=100,
            unique_usage=10,
            tags=['test'],
            programmatic_descriptions=['test'],
            badges=['badge1'],
            schema_description='schema description')

        expected_document_dict = {
            "database": "test_database",
            "cluster": "test_cluster",
            "schema": "test_schema",
            "name": "test_table",
            "display_name": "test_schema.test_table",
            "key": "test_table_key",
            "last_updated_timestamp": 123456789,
            "description": "test_table_description",
            "column_names": ["test_col1", "test_col2"],
            "column_descriptions": ["test_description1", "test_description2"],
            "total_usage": 100,
            "unique_usage": 10,
            "tags": ["test"],
            "programmatic_descriptions": ['test'],
            "badges": ["badge1"],
            'schema_description': 'schema description'
        }

        result = test_obj.to_json()
        results = result.split("\n")

        # verify two new line characters in result
        self.assertEqual(
            len(results), 2,
            "Result from to_json() function doesn't have a newline!")
        self.assertDictEqual(json.loads(results[0]), expected_document_dict)
    def test_loading_with_single_object(self):
        # type: () -> None
        """
        Test Loading functionality with single python object
        """
        loader = FSElasticsearchJSONLoader()
        loader.init(conf=Scoped.get_scoped_conf(conf=self.conf,
                                                scope=loader.get_scope()))

        data = TableESDocument(
            database='test_database',
            cluster='test_cluster',
            schema_name='test_schema',
            table_name='test_table',
            table_key='test_table_key',
            table_last_updated_epoch=123456789,
            table_description='test_description',
            column_names=['test_col1', 'test_col2'],
            column_descriptions=['test_comment1', 'test_comment2'],
            total_usage=10,
            unique_usage=5,
            tag_names=['test_tag1', 'test_tag2'])
        loader.load(data)
        loader.close()

        expected = [(
            '{"table_key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], '
            '"schema_name": "test_schema", "database": "test_database", "cluster": "test_cluster", '
            '"column_names": ["test_col1", "test_col2"], "table_name": "test_table", '
            '"table_last_updated_epoch": 123456789,'
            '"table_description": "test_description", "unique_usage": 5, "total_usage": 10, '
            '"tag_names": ["test_tag1", "test_tag2"]}')]

        self._check_results_helper(expected=expected)
    def test_transform_success_case(self):
        # type: () -> None
        """
        Test transform function with Neo4jDataResult Object
        """
        transformer = ElasticsearchDocumentTransformer()
        transformer.init(conf=Scoped.get_scoped_conf(
            conf=self.conf, scope=transformer.get_scope()))

        data = Neo4jDataResult(database="test_database",
                               cluster="test_cluster",
                               schema_name="test_schema_name",
                               table_name="test_table_name",
                               table_key="test_table_key",
                               table_last_updated_epoch=123456789,
                               table_description="test_table_description",
                               column_names=["test_col1", "test_col2"],
                               column_descriptions=[
                                   "test_col_description1",
                                   "test_col_description2"
                               ],
                               total_usage=10,
                               unique_usage=5,
                               tag_names=["test_tag1", "test_tag2"])

        result = transformer.transform(data)

        expected = TableESDocument(elasticsearch_index='test_es_index',
                                   elasticsearch_type='test_es_type',
                                   database="test_database",
                                   cluster="test_cluster",
                                   schema_name="test_schema_name",
                                   table_name="test_table_name",
                                   table_key="test_table_key",
                                   table_last_updated_epoch=123456789,
                                   table_description="test_table_description",
                                   column_names=["test_col1", "test_col2"],
                                   column_descriptions=[
                                       "test_col_description1",
                                       "test_col_description2"
                                   ],
                                   total_usage=10,
                                   unique_usage=5,
                                   tag_names=["test_tag1", "test_tag2"])

        self.assertIsInstance(result, ElasticsearchDocument)
        self.assertDictEqual(vars(result), vars(expected))
    def test_loading_with_list_of_objects(self):
        # type: () -> None
        """
        Test Loading functionality with list of objects.
        Check to ensure all objects are added to file
        """
        loader = FSElasticsearchJSONLoader()
        loader.init(conf=Scoped.get_scoped_conf(conf=self.conf,
                                                scope=loader.get_scope()))

        data = [
            TableESDocument(
                database='test_database',
                cluster='test_cluster',
                schema='test_schema',
                name='test_table',
                key='test_table_key',
                last_updated_timestamp=123456789,
                description='test_description',
                column_names=['test_col1', 'test_col2'],
                column_descriptions=['test_comment1', 'test_comment2'],
                total_usage=10,
                unique_usage=5,
                tags=['test_tag1', 'test_tag2'],
                badges=['badge1'],
                schema_description='schema_description',
                programmatic_descriptions=['test'])
        ] * 5

        for d in data:
            loader.load(d)
        loader.close()

        expected = [(
            '{"key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], '
            '"schema": "test_schema", "database": "test_database", "cluster": "test_cluster", '
            '"column_names": ["test_col1", "test_col2"], "name": "test_table", '
            '"last_updated_timestamp": 123456789, "display_name": "test_schema.test_table", '
            '"description": "test_description", "unique_usage": 5, "total_usage": 10, '
            '"tags": ["test_tag1", "test_tag2"], "schema_description": "schema_description", '
            '"programmatic_descriptions":["test"], '
            '"badges": ["badge1"]}')] * 5

        self._check_results_helper(expected=expected)
    def test_loading_with_list_of_objects(self):
        # type: () -> None
        """
        Test Loading functionality with list of objects.
        Check to ensure all objects are added to file
        """
        loader = FSElasticsearchJSONLoader()
        loader.init(conf=Scoped.get_scoped_conf(conf=self.conf,
                                                scope=loader.get_scope()))

        data = [TableESDocument(elasticsearch_index='test_es_index',
                                elasticsearch_type='test_es_type',
                                database='test_database',
                                cluster='test_cluster',
                                schema_name='test_schema',
                                table_name='test_table',
                                table_key='test_table_key',
                                table_last_updated_epoch=123456789,
                                table_description='test_description',
                                column_names=['test_col1', 'test_col2'],
                                column_descriptions=['test_comment1', 'test_comment2'],
                                total_usage=10,
                                unique_usage=5,
                                tag_names=['test_tag1', 'test_tag2'])] * 5

        for d in data:
            loader.load(d)
        loader.close()

        expected = [
            '{"index": {"_type": "test_es_type", "_index": "test_es_index"}}',
            ('{"table_key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], '
             '"schema_name": "test_schema", "database": "test_database", "cluster": "test_cluster", '
             '"column_names": ["test_col1", "test_col2"], "table_name": "test_table", '
             '"table_last_updated_epoch": 123456789,'
             '"table_description": "test_description", "unique_usage": 5, "total_usage": 10, '
             '"tag_names": ["test_tag1", "test_tag2"]}')
        ] * 5

        self._check_results_helper(expected=expected)
예제 #7
0
    def transform(self, record):
        # type: (Neo4jDataResult) -> Optional[ElasticsearchDocument]
        if not record:
            return None

        if not isinstance(record, Neo4jDataResult):
            raise Exception("ElasticsearchDocumentTransformer expects record of type 'Neo4jDataResult'!")

        elasticsearch_obj = TableESDocument(elasticsearch_index=self.elasticsearch_index,
                                            elasticsearch_type=self.elasticsearch_type,
                                            database=record.database,
                                            cluster=record.cluster,
                                            schema_name=record.schema_name,
                                            table_name=record.table_name,
                                            table_key=record.table_key,
                                            table_description=record.table_description,
                                            table_last_updated_epoch=record.table_last_updated_epoch,
                                            column_names=record.column_names,
                                            column_descriptions=record.column_descriptions,
                                            total_usage=record.total_usage,
                                            unique_usage=record.unique_usage,
                                            tag_names=record.tag_names)
        return elasticsearch_obj