def test_to_json(self): # type: () -> None """ Test string generated from to_json method """ test_obj = TableESDocument( elasticsearch_index='test_index', elasticsearch_type='test_type', database='test_database', cluster='test_cluster', schema_name='test_schema', table_name='test_table', table_key='test_table_key', table_last_updated_epoch=123456789, table_description='test_table_description', column_names=['test_col1', 'test_col2'], column_descriptions=['test_description1', 'test_description2'], total_usage=100, unique_usage=10, tag_names=['test']) expected_index_dict = { "index": { "_type": "test_type", "_index": "test_index" } } expected_document_dict = { "database": "test_database", "cluster": "test_cluster", "schema_name": "test_schema", "table_name": "test_table", "table_key": "test_table_key", "table_last_updated_epoch": 123456789, "table_description": "test_table_description", "column_names": ["test_col1", "test_col2"], "column_descriptions": ["test_description1", "test_description2"], "total_usage": 100, "unique_usage": 10, "tag_names": ["test"] } result = test_obj.to_json() results = result.split("\n") # verify two new line characters in result self.assertEqual( len(results), 3, "Result from to_json() function doesn't have 2 newlines!") self.assertDictEqual(json.loads(results[0]), expected_index_dict) self.assertDictEqual(json.loads(results[1]), expected_document_dict)
def test_to_json(self): # type: () -> None """ Test string generated from to_json method """ test_obj = TableESDocument( database='test_database', cluster='test_cluster', schema='test_schema', name='test_table', key='test_table_key', last_updated_timestamp=123456789, description='test_table_description', column_names=['test_col1', 'test_col2'], column_descriptions=['test_description1', 'test_description2'], total_usage=100, unique_usage=10, tags=['test'], programmatic_descriptions=['test'], badges=['badge1'], schema_description='schema description') expected_document_dict = { "database": "test_database", "cluster": "test_cluster", "schema": "test_schema", "name": "test_table", "display_name": "test_schema.test_table", "key": "test_table_key", "last_updated_timestamp": 123456789, "description": "test_table_description", "column_names": ["test_col1", "test_col2"], "column_descriptions": ["test_description1", "test_description2"], "total_usage": 100, "unique_usage": 10, "tags": ["test"], "programmatic_descriptions": ['test'], "badges": ["badge1"], 'schema_description': 'schema description' } result = test_obj.to_json() results = result.split("\n") # verify two new line characters in result self.assertEqual( len(results), 2, "Result from to_json() function doesn't have a newline!") self.assertDictEqual(json.loads(results[0]), expected_document_dict)
def test_loading_with_single_object(self): # type: () -> None """ Test Loading functionality with single python object """ loader = FSElasticsearchJSONLoader() loader.init(conf=Scoped.get_scoped_conf(conf=self.conf, scope=loader.get_scope())) data = TableESDocument( database='test_database', cluster='test_cluster', schema_name='test_schema', table_name='test_table', table_key='test_table_key', table_last_updated_epoch=123456789, table_description='test_description', column_names=['test_col1', 'test_col2'], column_descriptions=['test_comment1', 'test_comment2'], total_usage=10, unique_usage=5, tag_names=['test_tag1', 'test_tag2']) loader.load(data) loader.close() expected = [( '{"table_key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], ' '"schema_name": "test_schema", "database": "test_database", "cluster": "test_cluster", ' '"column_names": ["test_col1", "test_col2"], "table_name": "test_table", ' '"table_last_updated_epoch": 123456789,' '"table_description": "test_description", "unique_usage": 5, "total_usage": 10, ' '"tag_names": ["test_tag1", "test_tag2"]}')] self._check_results_helper(expected=expected)
def test_transform_success_case(self): # type: () -> None """ Test transform function with Neo4jDataResult Object """ transformer = ElasticsearchDocumentTransformer() transformer.init(conf=Scoped.get_scoped_conf( conf=self.conf, scope=transformer.get_scope())) data = Neo4jDataResult(database="test_database", cluster="test_cluster", schema_name="test_schema_name", table_name="test_table_name", table_key="test_table_key", table_last_updated_epoch=123456789, table_description="test_table_description", column_names=["test_col1", "test_col2"], column_descriptions=[ "test_col_description1", "test_col_description2" ], total_usage=10, unique_usage=5, tag_names=["test_tag1", "test_tag2"]) result = transformer.transform(data) expected = TableESDocument(elasticsearch_index='test_es_index', elasticsearch_type='test_es_type', database="test_database", cluster="test_cluster", schema_name="test_schema_name", table_name="test_table_name", table_key="test_table_key", table_last_updated_epoch=123456789, table_description="test_table_description", column_names=["test_col1", "test_col2"], column_descriptions=[ "test_col_description1", "test_col_description2" ], total_usage=10, unique_usage=5, tag_names=["test_tag1", "test_tag2"]) self.assertIsInstance(result, ElasticsearchDocument) self.assertDictEqual(vars(result), vars(expected))
def test_loading_with_list_of_objects(self): # type: () -> None """ Test Loading functionality with list of objects. Check to ensure all objects are added to file """ loader = FSElasticsearchJSONLoader() loader.init(conf=Scoped.get_scoped_conf(conf=self.conf, scope=loader.get_scope())) data = [ TableESDocument( database='test_database', cluster='test_cluster', schema='test_schema', name='test_table', key='test_table_key', last_updated_timestamp=123456789, description='test_description', column_names=['test_col1', 'test_col2'], column_descriptions=['test_comment1', 'test_comment2'], total_usage=10, unique_usage=5, tags=['test_tag1', 'test_tag2'], badges=['badge1'], schema_description='schema_description', programmatic_descriptions=['test']) ] * 5 for d in data: loader.load(d) loader.close() expected = [( '{"key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], ' '"schema": "test_schema", "database": "test_database", "cluster": "test_cluster", ' '"column_names": ["test_col1", "test_col2"], "name": "test_table", ' '"last_updated_timestamp": 123456789, "display_name": "test_schema.test_table", ' '"description": "test_description", "unique_usage": 5, "total_usage": 10, ' '"tags": ["test_tag1", "test_tag2"], "schema_description": "schema_description", ' '"programmatic_descriptions":["test"], ' '"badges": ["badge1"]}')] * 5 self._check_results_helper(expected=expected)
def test_loading_with_list_of_objects(self): # type: () -> None """ Test Loading functionality with list of objects. Check to ensure all objects are added to file """ loader = FSElasticsearchJSONLoader() loader.init(conf=Scoped.get_scoped_conf(conf=self.conf, scope=loader.get_scope())) data = [TableESDocument(elasticsearch_index='test_es_index', elasticsearch_type='test_es_type', database='test_database', cluster='test_cluster', schema_name='test_schema', table_name='test_table', table_key='test_table_key', table_last_updated_epoch=123456789, table_description='test_description', column_names=['test_col1', 'test_col2'], column_descriptions=['test_comment1', 'test_comment2'], total_usage=10, unique_usage=5, tag_names=['test_tag1', 'test_tag2'])] * 5 for d in data: loader.load(d) loader.close() expected = [ '{"index": {"_type": "test_es_type", "_index": "test_es_index"}}', ('{"table_key": "test_table_key", "column_descriptions": ["test_comment1", "test_comment2"], ' '"schema_name": "test_schema", "database": "test_database", "cluster": "test_cluster", ' '"column_names": ["test_col1", "test_col2"], "table_name": "test_table", ' '"table_last_updated_epoch": 123456789,' '"table_description": "test_description", "unique_usage": 5, "total_usage": 10, ' '"tag_names": ["test_tag1", "test_tag2"]}') ] * 5 self._check_results_helper(expected=expected)
def transform(self, record): # type: (Neo4jDataResult) -> Optional[ElasticsearchDocument] if not record: return None if not isinstance(record, Neo4jDataResult): raise Exception("ElasticsearchDocumentTransformer expects record of type 'Neo4jDataResult'!") elasticsearch_obj = TableESDocument(elasticsearch_index=self.elasticsearch_index, elasticsearch_type=self.elasticsearch_type, database=record.database, cluster=record.cluster, schema_name=record.schema_name, table_name=record.table_name, table_key=record.table_key, table_description=record.table_description, table_last_updated_epoch=record.table_last_updated_epoch, column_names=record.column_names, column_descriptions=record.column_descriptions, total_usage=record.total_usage, unique_usage=record.unique_usage, tag_names=record.tag_names) return elasticsearch_obj