コード例 #1
0
def run_mssql_job():
    where_clause_suffix = textwrap.dedent("""
        ('dbo')
    """)

    tmp_folder = '/var/tmp/amundsen/table_metadata'
    node_files_folder = '{tmp_folder}/nodes/'.format(tmp_folder=tmp_folder)
    relationship_files_folder = '{tmp_folder}/relationships/'.format(tmp_folder=tmp_folder)

    job_config = ConfigFactory.from_dict({
        #'extractor.mssql_metadata.{}'.format(MSSQLMetadataExtractor.WHERE_CLAUSE_SUFFIX_KEY):
        #    where_clause_suffix,
        'extractor.mssql_metadata.{}'.format(MSSQLMetadataExtractor.USE_CATALOG_AS_CLUSTER_NAME):
            True,
        'extractor.mssql_metadata.extractor.sqlalchemy.{}'.format(SQLAlchemyExtractor.CONN_STRING):
            connection_string(),
        'loader.filesystem_csv_neo4j.{}'.format(FsNeo4jCSVLoader.NODE_DIR_PATH):
            node_files_folder,
        'loader.filesystem_csv_neo4j.{}'.format(FsNeo4jCSVLoader.RELATION_DIR_PATH):
            relationship_files_folder,
        'publisher.neo4j.{}'.format(neo4j_csv_publisher.NODE_FILES_DIR):
            node_files_folder,
        'publisher.neo4j.{}'.format(neo4j_csv_publisher.RELATION_FILES_DIR):
            relationship_files_folder,
        'publisher.neo4j.{}'.format(neo4j_csv_publisher.NEO4J_END_POINT_KEY):
            neo4j_endpoint,
        'publisher.neo4j.{}'.format(neo4j_csv_publisher.NEO4J_USER):
            neo4j_user,
        'publisher.neo4j.{}'.format(neo4j_csv_publisher.NEO4J_PASSWORD):
            neo4j_password,
        'publisher.neo4j.{}'.format(neo4j_csv_publisher.JOB_PUBLISH_TAG):
            'unique_tag',  # should use unique tag here like {ds}
    })
    
    job = DefaultJob(conf=job_config,
                     task=DefaultTask(extractor=MSSQLMetadataExtractor(), loader=FsNeo4jCSVLoader()),
                     publisher=Neo4jCsvPublisher())
    return job
    def test_extraction_with_single_result(self):
        # type: () -> None
        with patch.object(SQLAlchemyExtractor,
                          '_get_connection') as mock_connection:
            connection = MagicMock()
            mock_connection.return_value = connection
            sql_execute = MagicMock()
            connection.execute = sql_execute
            table = {
                'schema_name':
                'test_schema',
                'name':
                'test_table',
                'description':
                'a table for testing',
                'cluster':
                self.conf['extractor.mssql_metadata.{}'.format(
                    MSSQLMetadataExtractor.CLUSTER_KEY)]
            }

            sql_execute.return_value = [
                self._union(
                    {
                        'col_name': 'col_id1',
                        'col_type': 'bigint',
                        'col_description': 'description of id1',
                        'col_sort_order': 0
                    }, table),
                self._union(
                    {
                        'col_name': 'col_id2',
                        'col_type': 'bigint',
                        'col_description': 'description of id2',
                        'col_sort_order': 1
                    }, table),
                self._union(
                    {
                        'col_name': 'is_active',
                        'col_type': 'boolean',
                        'col_description': None,
                        'col_sort_order': 2
                    }, table),
                self._union(
                    {
                        'col_name': 'source',
                        'col_type': 'varchar',
                        'col_description': 'description of source',
                        'col_sort_order': 3
                    }, table),
                self._union(
                    {
                        'col_name': 'etl_created_at',
                        'col_type': 'timestamp',
                        'col_description': 'description of etl_created_at',
                        'col_sort_order': 4
                    }, table),
                self._union(
                    {
                        'col_name': 'ds',
                        'col_type': 'varchar',
                        'col_description': None,
                        'col_sort_order': 5
                    }, table)
            ]

            extractor = MSSQLMetadataExtractor()
            extractor.init(self.conf)
            actual = extractor.extract()
            expected = TableMetadata(
                'mssql', 'MY_CLUSTER', 'test_schema', 'test_table',
                'a table for testing', [
                    ColumnMetadata('col_id1', 'description of id1', 'bigint',
                                   0),
                    ColumnMetadata('col_id2', 'description of id2', 'bigint',
                                   1),
                    ColumnMetadata('is_active', None, 'boolean', 2),
                    ColumnMetadata('source', 'description of source',
                                   'varchar', 3),
                    ColumnMetadata('etl_created_at',
                                   'description of etl_created_at',
                                   'timestamp', 4),
                    ColumnMetadata('ds', None, 'varchar', 5)
                ], False, ['test_schema'])

            self.assertEqual(expected.__repr__(), actual.__repr__())
            self.assertIsNone(extractor.extract())
コード例 #3
0
    def test_extraction_with_multiple_result(self) -> None:
        with patch.object(SQLAlchemyExtractor, '_get_connection') as mock_connection:
            connection = MagicMock()
            mock_connection.return_value = connection
            sql_execute = MagicMock()
            connection.execute = sql_execute
            table = {'schema_name': 'test_schema1',
                     'name': 'test_table1',
                     'description': 'test table 1',
                     'cluster':
                         self.conf[f'extractor.mssql_metadata.{MSSQLMetadataExtractor.CLUSTER_KEY}']
                     }

            table1 = {'schema_name': 'test_schema1',
                      'name': 'test_table2',
                      'description': 'test table 2',
                      'cluster':
                          self.conf[f'extractor.mssql_metadata.{MSSQLMetadataExtractor.CLUSTER_KEY}']
                      }

            table2 = {'schema_name': 'test_schema2',
                      'name': 'test_table3',
                      'description': 'test table 3',
                      'cluster':
                          self.conf[f'extractor.mssql_metadata.{MSSQLMetadataExtractor.CLUSTER_KEY}']
                      }

            sql_execute.return_value = [
                self._union(
                    {'col_name': 'col_id1',
                     'col_type': 'bigint',
                     'col_description': 'description of col_id1',
                     'col_sort_order': 0}, table),
                self._union(
                    {'col_name': 'col_id2',
                     'col_type': 'bigint',
                     'col_description': 'description of col_id2',
                     'col_sort_order': 1}, table),
                self._union(
                    {'col_name': 'is_active',
                     'col_type': 'boolean',
                     'col_description': None,
                     'col_sort_order': 2}, table),
                self._union(
                    {'col_name': 'source',
                     'col_type': 'varchar',
                     'col_description': 'description of source',
                     'col_sort_order': 3}, table),
                self._union(
                    {'col_name': 'etl_created_at',
                     'col_type': 'timestamp',
                     'col_description': 'description of etl_created_at',
                     'col_sort_order': 4}, table),
                self._union(
                    {'col_name': 'ds',
                     'col_type': 'varchar',
                     'col_description': None,
                     'col_sort_order': 5}, table),
                self._union(
                    {'col_name': 'col_name',
                     'col_type': 'varchar',
                     'col_description': 'description of col_name',
                     'col_sort_order': 0}, table1),
                self._union(
                    {'col_name': 'col_name2',
                     'col_type': 'varchar',
                     'col_description': 'description of col_name2',
                     'col_sort_order': 1}, table1),
                self._union(
                    {'col_name': 'col_id3',
                     'col_type': 'varchar',
                     'col_description': 'description of col_id3',
                     'col_sort_order': 0}, table2),
                self._union(
                    {'col_name': 'col_name3',
                     'col_type': 'varchar',
                     'col_description': 'description of col_name3',
                     'col_sort_order': 1}, table2)
            ]

            extractor = MSSQLMetadataExtractor()
            extractor.init(self.conf)

            expected = TableMetadata(
                'mssql',
                self.conf[f'extractor.mssql_metadata.{MSSQLMetadataExtractor.CLUSTER_KEY}'],
                'test_schema1', 'test_table1', 'test table 1',
                [ColumnMetadata('col_id1', 'description of col_id1', 'bigint', 0),
                 ColumnMetadata('col_id2', 'description of col_id2', 'bigint', 1),
                 ColumnMetadata('is_active', None, 'boolean', 2),
                 ColumnMetadata('source', 'description of source', 'varchar', 3),
                 ColumnMetadata('etl_created_at', 'description of etl_created_at', 'timestamp', 4),
                 ColumnMetadata('ds', None, 'varchar', 5),

                 ],
                False, ['test_schema1']
            )

            actual = extractor.extract().__repr__()
            self.assertEqual(expected.__repr__(), actual)

            expected = TableMetadata(
                'mssql',
                self.conf[f'extractor.mssql_metadata.{MSSQLMetadataExtractor.CLUSTER_KEY}'],
                'test_schema1', 'test_table2', 'test table 2',
                [ColumnMetadata('col_name', 'description of col_name', 'varchar', 0),
                 ColumnMetadata('col_name2', 'description of col_name2', 'varchar', 1)],
                False, ['test_schema1'])
            actual = extractor.extract().__repr__()

            self.assertEqual(expected.__repr__(), actual)

            expected = TableMetadata(
                'mssql',
                self.conf[f'extractor.mssql_metadata.{MSSQLMetadataExtractor.CLUSTER_KEY}'],
                'test_schema2', 'test_table3', 'test table 3',
                [ColumnMetadata('col_id3', 'description of col_id3', 'varchar', 0),
                 ColumnMetadata('col_name3', 'description of col_name3',
                                'varchar', 1)],
                False, ['test_schema2'])
            actual = extractor.extract().__repr__()
            self.assertEqual(expected.__repr__(), actual)

            self.assertIsNone(extractor.extract())
            self.assertIsNone(extractor.extract())