def init(self, conf: ConfigTree) -> None:
        """
        Initialize Neo4jExtractor object from configuration and use that for extraction
        """
        self.conf = conf
        self.entity = conf.get_string(Neo4jSearchDataExtractor.ENTITY_TYPE,
                                      default='table').lower()
        # extract cypher query from conf, if specified, else use default query
        if Neo4jSearchDataExtractor.CYPHER_QUERY_CONFIG_KEY in conf:
            self.cypher_query = conf.get_string(
                Neo4jSearchDataExtractor.CYPHER_QUERY_CONFIG_KEY)
        else:
            default_query = Neo4jSearchDataExtractor.DEFAULT_QUERY_BY_ENTITY[
                self.entity]
            self.cypher_query = self._add_publish_tag_filter(
                conf.get_string(JOB_PUBLISH_TAG, ''),
                cypher_query=default_query)

        self.neo4j_extractor = Neo4jExtractor()
        # write the cypher query in configs in Neo4jExtractor scope
        key = self.neo4j_extractor.get_scope(
        ) + '.' + Neo4jExtractor.CYPHER_QUERY_CONFIG_KEY
        self.conf.put(key, self.cypher_query)
        # initialize neo4j_extractor from configs
        self.neo4j_extractor.init(
            Scoped.get_scoped_conf(self.conf,
                                   self.neo4j_extractor.get_scope()))
    def test_extraction_with_multiple_query_result(self):
        # type: (Any) -> None
        """
        Test Extraction with multiple result from query
        """
        with patch.object(Neo4jExtractor, '_get_driver'):
            extractor = Neo4jExtractor()
            extractor.init(
                Scoped.get_scoped_conf(conf=self.conf,
                                       scope=extractor.get_scope()))

            extractor.results = [
                'test_result1', 'test_result2', 'test_result3'
            ]

            result = extractor.extract()
            self.assertEqual(result, 'test_result1')

            result = extractor.extract()
            self.assertEqual(result, 'test_result2')

            result = extractor.extract()
            self.assertEqual(result, 'test_result3')

            # Ensure next result is None
            result = extractor.extract()
            self.assertIsNone(result)
    def init(self, conf):
        # type: (ConfigTree) -> None
        """
        Initialize Neo4jExtractor object from configuration and use that for extraction
        """
        self.conf = conf

        # extract cypher query from conf, if specified, else use default query
        if Neo4jSearchDataExtractor.CYPHER_QUERY_CONFIG_KEY in conf:
            self.cypher_query = conf.get_string(
                Neo4jSearchDataExtractor.CYPHER_QUERY_CONFIG_KEY)
        else:
            self.cypher_query = self._add_publish_tag_filter(
                conf.get_string(JOB_PUBLISH_TAG, ''),
                Neo4jSearchDataExtractor.DEFAULT_NEO4J_CYPHER_QUERY)

        self.neo4j_extractor = Neo4jExtractor()
        # write the cypher query in configs in Neo4jExtractor scope
        key = self.neo4j_extractor.get_scope(
        ) + '.' + Neo4jExtractor.CYPHER_QUERY_CONFIG_KEY
        self.conf.put(key, self.cypher_query)
        # initialize neo4j_extractor from configs
        self.neo4j_extractor.init(
            Scoped.get_scoped_conf(self.conf,
                                   self.neo4j_extractor.get_scope()))
Exemple #4
0
    def text_extraction_with_empty_query_result(self):
        # type: (Any) -> None
        """
        Test Extraction with empty results from query
        """
        with patch.object(Neo4jExtractor, '_get_driver'):
            extractor = Neo4jExtractor()
            extractor.init(Scoped.get_scoped_conf(conf=self.conf,
                                                  scope=extractor.get_scope()))

            extractor.results = ['']
            result = extractor.extract()
            self.assertIsNone(result)
    def test_extraction_with_model_class(self):
        # type: (Any) -> None
        """
        Test Extraction using model class
        """
        config_dict = {
            'extractor.neo4j.{}'.format(Neo4jExtractor.GRAPH_URL_CONFIG_KEY):
            'TEST_GRAPH_URL',
            'extractor.neo4j.{}'.format(Neo4jExtractor.CYPHER_QUERY_CONFIG_KEY):
            'TEST_QUERY',
            'extractor.neo4j.{}'.format(Neo4jExtractor.NEO4J_AUTH_USER):
            'TEST_USER',
            'extractor.neo4j.{}'.format(Neo4jExtractor.NEO4J_AUTH_PW):
            'TEST_PW',
            'extractor.neo4j.{}'.format(Neo4jExtractor.MODEL_CLASS_CONFIG_KEY):
            'databuilder.models.table_elasticsearch_document.TableESDocument'
        }

        self.conf = ConfigFactory.from_dict(config_dict)

        with patch.object(Neo4jExtractor, '_get_driver'):
            extractor = Neo4jExtractor()
            extractor.init(
                Scoped.get_scoped_conf(conf=self.conf,
                                       scope=extractor.get_scope()))

            result_dict = dict(
                database='test_database',
                cluster='test_cluster',
                schema='test_schema',
                name='test_table_name',
                display_name='test_schema.test_table_name',
                key='test_table_key',
                description='test_table_description',
                last_updated_timestamp=123456789,
                column_names=['test_col1', 'test_col2', 'test_col3'],
                column_descriptions=[
                    'test_description1', 'test_description2', ''
                ],
                total_usage=100,
                unique_usage=5,
                tags=['hive'],
                badges=['badge1'],
                schema_description='schema_description')

            extractor.results = [result_dict]
            result_obj = extractor.extract()

            self.assertIsInstance(result_obj, TableESDocument)
            self.assertDictEqual(vars(result_obj), result_dict)
    def test_extraction_with_single_query_result(self: Any) -> None:
        """
        Test Extraction with single result from query
        """
        with patch.object(Neo4jExtractor, '_get_driver'):
            extractor = Neo4jExtractor()
            extractor.init(
                Scoped.get_scoped_conf(conf=self.conf,
                                       scope=extractor.get_scope()))

            extractor.results = ['test_result']
            result = extractor.extract()
            self.assertEqual(result, 'test_result')

            # Ensure second result is None
            result = extractor.extract()
            self.assertIsNone(result)