def test_transform_without_model_class_conf(self):
     # type: () -> None
     """
     Test model_class conf is required
     """
     config_dict = {
         'transformer.elasticsearch.index': self.elasticsearch_index,
         'transformer.elasticsearch.doc_type': self.elasticsearch_type
     }
     transformer = ElasticsearchDocumentTransformer()
     with self.assertRaises(Exception) as context:
         transformer.init(conf=Scoped.get_scoped_conf(
             conf=ConfigFactory.from_dict(config_dict),
             scope=transformer.get_scope()))
     self.assertTrue(
         "User needs to provide the ElasticsearchDocument model class" in
         context.exception)
def create_es_publisher_sample_job():

    # loader save data to this location and publisher read if from here
    # extracted_search_data_path = os.path.join(BASE_DIR, "amundsen", "search_data.json")
    extracted_search_data_path = '/tmp/amundsen/search_data.json'

    task = DefaultTask(
        loader=FSElasticsearchJSONLoader(),
        extractor=Neo4jSearchDataExtractor(),
        transformer=ElasticsearchDocumentTransformer(),
    )

    # elastic search client instance
    elasticsearch_client = es
    # unique name of new index in Elasticsearch
    elasticsearch_new_index_key = "tables" + str(uuid.uuid4())
    # related to mapping type from /databuilder/publisher/elasticsearch_publisher.py#L38
    elasticsearch_new_index_key_type = "table"
    # alias for Elasticsearch used in amundsensearchlibrary/search_service/config.py as an index
    elasticsearch_index_alias = "table_search_index"

    job_config = ConfigFactory.from_dict({
        "extractor.search_data.extractor.neo4j.{}".format(Neo4jExtractor.GRAPH_URL_CONFIG_KEY):
        neo4j_endpoint,
        "extractor.search_data.extractor.neo4j.{}".format(Neo4jExtractor.MODEL_CLASS_CONFIG_KEY):
        "databuilder.models.neo4j_data.Neo4jDataResult",
        "extractor.search_data.extractor.neo4j.{}".format(Neo4jExtractor.NEO4J_AUTH_USER):
        neo4j_user,
        "extractor.search_data.extractor.neo4j.{}".format(Neo4jExtractor.NEO4J_AUTH_PW):
        neo4j_password,
        "loader.filesystem.elasticsearch.{}".format(FSElasticsearchJSONLoader.FILE_PATH_CONFIG_KEY):
        extracted_search_data_path,
        "loader.filesystem.elasticsearch.{}".format(FSElasticsearchJSONLoader.FILE_MODE_CONFIG_KEY):
        "w",
        "transformer.elasticsearch.{}".format(ElasticsearchDocumentTransformer.ELASTICSEARCH_INDEX_CONFIG_KEY):
        elasticsearch_new_index_key,
        "transformer.elasticsearch.{}".format(ElasticsearchDocumentTransformer.ELASTICSEARCH_DOC_CONFIG_KEY):
        elasticsearch_new_index_key_type,
        "publisher.elasticsearch.{}".format(ElasticsearchPublisher.FILE_PATH_CONFIG_KEY):
        extracted_search_data_path,
        "publisher.elasticsearch.{}".format(ElasticsearchPublisher.FILE_MODE_CONFIG_KEY):
        "r",
        "publisher.elasticsearch.{}".format(ElasticsearchPublisher.ELASTICSEARCH_CLIENT_CONFIG_KEY):
        elasticsearch_client,
        "publisher.elasticsearch.{}".format(ElasticsearchPublisher.ELASTICSEARCH_NEW_INDEX_CONFIG_KEY):
        elasticsearch_new_index_key,
        "publisher.elasticsearch.{}".format(ElasticsearchPublisher.ELASTICSEARCH_ALIAS_CONFIG_KEY):
        elasticsearch_index_alias,
    })

    job = DefaultJob(conf=job_config,
                     task=task,
                     publisher=ElasticsearchPublisher())
    return job
 def test_transform_with_invalid_model_class_conf(self):
     # type: () -> None
     """
     Test non existing model_class conf will throw error
     """
     config_dict = {
         'transformer.elasticsearch.index':
         self.elasticsearch_index,
         'transformer.elasticsearch.doc_type':
         self.elasticsearch_type,
         'transformer.elasticsearch.model_class':
         'databuilder.models.table_elasticsearch_document.NonExistingESDocument'
     }
     transformer = ElasticsearchDocumentTransformer()
     with self.assertRaises(Exception) as context:
         transformer.init(conf=Scoped.get_scoped_conf(
             conf=ConfigFactory.from_dict(config_dict),
             scope=transformer.get_scope()))
     self.assertTrue(
         "'module' object has no attribute 'NonExistingESDocument'" in
         context.exception)
    def test_empty_transform(self):
        # type: () -> None
        """
        Test Transform functionality with no data
        """
        transformer = ElasticsearchDocumentTransformer()
        transformer.init(conf=Scoped.get_scoped_conf(
            conf=self.conf, scope=transformer.get_scope()))

        result = transformer.transform(None)  # type: ignore
        self.assertIsNone(result)
    def test_transform_with_dict_object(self):
        # type: () -> None
        """
        Test Transform functionality with Dict object
        """
        transformer = ElasticsearchDocumentTransformer()
        transformer.init(conf=Scoped.get_scoped_conf(
            conf=self.conf, scope=transformer.get_scope()))

        data = dict(test_key="DOES_NOT_MATTER", test_key2="DOES_NOT_MATTER2")

        with self.assertRaises(Exception) as context:
            transformer.transform(data)  # type: ignore
        self.assertTrue(
            "ElasticsearchDocumentTransformer expects record of type 'Neo4jDataResult'!"
            in context.exception)
    def test_transform_success_case(self):
        # type: () -> None
        """
        Test transform function with Neo4jDataResult Object
        """
        transformer = ElasticsearchDocumentTransformer()
        transformer.init(conf=Scoped.get_scoped_conf(
            conf=self.conf, scope=transformer.get_scope()))

        data = Neo4jDataResult(database="test_database",
                               cluster="test_cluster",
                               schema_name="test_schema_name",
                               table_name="test_table_name",
                               table_key="test_table_key",
                               table_last_updated_epoch=123456789,
                               table_description="test_table_description",
                               column_names=["test_col1", "test_col2"],
                               column_descriptions=[
                                   "test_col_description1",
                                   "test_col_description2"
                               ],
                               total_usage=10,
                               unique_usage=5,
                               tag_names=["test_tag1", "test_tag2"])

        result = transformer.transform(data)

        expected = TableESDocument(elasticsearch_index='test_es_index',
                                   elasticsearch_type='test_es_type',
                                   database="test_database",
                                   cluster="test_cluster",
                                   schema_name="test_schema_name",
                                   table_name="test_table_name",
                                   table_key="test_table_key",
                                   table_last_updated_epoch=123456789,
                                   table_description="test_table_description",
                                   column_names=["test_col1", "test_col2"],
                                   column_descriptions=[
                                       "test_col_description1",
                                       "test_col_description2"
                                   ],
                                   total_usage=10,
                                   unique_usage=5,
                                   tag_names=["test_tag1", "test_tag2"])

        self.assertIsInstance(result, ElasticsearchDocument)
        self.assertDictEqual(vars(result), vars(expected))