def test_load_no_table(self, mock_load_job_config):
        mock_db = Mock(name='mock_db')
        mock_url_resolver = MagicMock(name='mock_url_resolver')
        mock_gcs_temp_base_loc = None
        big_query_loader = BigQueryLoader(
            db=mock_db,
            url_resolver=mock_url_resolver,
            gcs_temp_base_loc=mock_gcs_temp_base_loc)
        mock_schema = 'my_project.my_dataset'
        mock_table = 'mytable'
        mock_load_plan = Mock(name='mock_load_plan')
        mock_load_plan.records_format = Mock(name='records_format',
                                             spec=DelimitedRecordsFormat)
        mock_target_records_format = mock_load_plan.records_format
        mock_target_records_format.format_type = 'delimited'
        mock_target_records_format.hints = {}
        mock_directory = Mock(name='mock_directory')
        mock_directory.scheme = 'gs'
        mock_url = Mock(name='mock_url')
        mock_directory.manifest_entry_urls.return_value = [mock_url]

        mock_connection = mock_db.engine.raw_connection.return_value.connection
        mock_client = mock_connection._client
        mock_client.get_table.side_effect = NotFound('missing table')
        with self.assertRaises(NotFound):
            big_query_loader.load(schema=mock_schema,
                                  table=mock_table,
                                  load_plan=mock_load_plan,
                                  directory=mock_directory)
    def test_load_with_job_failure(self, mock_load_job_config):
        mock_db = Mock(name='mock_db')
        mock_url_resolver = MagicMock(name='mock_url_resolver')
        mock_gcs_temp_base_loc = None
        big_query_loader = BigQueryLoader(
            db=mock_db,
            url_resolver=mock_url_resolver,
            gcs_temp_base_loc=mock_gcs_temp_base_loc)
        mock_schema = 'my_project.my_dataset'
        mock_table = 'mytable'
        mock_load_plan = Mock(name='mock_load_plan')
        mock_load_plan.records_format = Mock(name='records_format',
                                             spec=DelimitedRecordsFormat)
        mock_target_records_format = mock_load_plan.records_format
        mock_target_records_format.format_type = 'delimited'
        mock_target_records_format.hints = {}
        mock_directory = Mock(name='mock_directory')
        mock_directory.scheme = 'gs'
        mock_url = Mock(name='mock_url')
        mock_directory.manifest_entry_urls.return_value = [mock_url]

        mock_connection = mock_db.engine.raw_connection.return_value.connection
        mock_client = mock_connection._client
        mock_job = mock_client.load_table_from_uri.return_value
        mock_job.result.side_effect = Exception('some errors')

        mock_client.get_table.return_value.location = 'some-location'

        with self.assertRaises(Exception):
            big_query_loader.load(schema=mock_schema,
                                  table=mock_table,
                                  load_plan=mock_load_plan,
                                  directory=mock_directory)
 def test_best_scheme_to_load_from(self):
     mock_db = Mock(name='db')
     mock_url_resolver = Mock(name='url_resolver')
     mock_gcs_temp_base_loc = MagicMock(name='gcs_temp_base_loc')
     bigquery_loader = BigQueryLoader(
         db=mock_db,
         url_resolver=mock_url_resolver,
         gcs_temp_base_loc=mock_gcs_temp_base_loc)
     self.assertEqual('gs', bigquery_loader.best_scheme_to_load_from())
 def test_has_temporary_loadable_directory_loc_true(self):
     mock_db = Mock(name='db')
     mock_url_resolver = Mock(name='url_resolver')
     mock_gcs_temp_base_loc = MagicMock(name='gcs_temp_base_loc')
     bigquery_loader = BigQueryLoader(
         db=mock_db,
         url_resolver=mock_url_resolver,
         gcs_temp_base_loc=mock_gcs_temp_base_loc)
     self.assertTrue(bigquery_loader.has_temporary_loadable_directory_loc())
 def test_temporary_gcs_directory_loc_none(self):
     mock_db = Mock(name='db')
     mock_url_resolver = Mock(name='url_resolver')
     bigquery_loader = BigQueryLoader(db=mock_db,
                                      url_resolver=mock_url_resolver,
                                      gcs_temp_base_loc=None)
     with self.assertRaises(NoTemporaryBucketConfiguration):
         with bigquery_loader.temporary_gcs_directory_loc():
             pass
 def test_temporary_loadable_directory_scheme(self):
     mock_db = Mock(name='db')
     mock_url_resolver = Mock(name='url_resolver')
     mock_gcs_temp_base_loc = MagicMock(name='gcs_temp_base_loc')
     bigquery_loader = BigQueryLoader(
         db=mock_db,
         url_resolver=mock_url_resolver,
         gcs_temp_base_loc=mock_gcs_temp_base_loc)
     self.assertEqual('gs',
                      bigquery_loader.temporary_loadable_directory_scheme())
 def test_temporary_gcs_directory_loc(self):
     mock_db = Mock(name='db')
     mock_url_resolver = Mock(name='url_resolver')
     mock_gcs_temp_base_loc = MagicMock(name='gcs_temp_base_loc')
     bigquery_loader = BigQueryLoader(
         db=mock_db,
         url_resolver=mock_url_resolver,
         gcs_temp_base_loc=mock_gcs_temp_base_loc)
     with bigquery_loader.temporary_gcs_directory_loc() as loc:
         self.assertEqual(
             loc, mock_gcs_temp_base_loc.temporary_directory.return_value.
             __enter__.return_value)
Example #8
0
 def test_known_supported_records_formats_for_load(self):
     mock_db = Mock(name='db')
     mock_url_resolver = Mock(name='url_resolver')
     bigquery_loader = BigQueryLoader(db=mock_db,
                                      url_resolver=mock_url_resolver)
     out = bigquery_loader.known_supported_records_formats_for_load()
     self.assertEqual(2, len(out))
     delimited_records_format = out[0]
     self.assertEqual(type(delimited_records_format),
                      DelimitedRecordsFormat)
     self.assertEqual('bigquery', delimited_records_format.variant)
     parquet_records_format = out[1]
     self.assertEqual(type(parquet_records_format), ParquetRecordsFormat)
    def test_load_with_fileobj_fallback(self, mock_load_job_config):
        mock_db = Mock(name='mock_db')
        mock_url_resolver = MagicMock(name='mock_url_resolver')
        mock_gcs_temp_base_loc = None
        big_query_loader = BigQueryLoader(
            db=mock_db,
            url_resolver=mock_url_resolver,
            gcs_temp_base_loc=mock_gcs_temp_base_loc)
        mock_schema = 'my_project.my_dataset'
        mock_table = 'mytable'
        mock_load_plan = Mock(name='mock_load_plan')
        mock_load_plan.records_format = Mock(name='records_format',
                                             spec=DelimitedRecordsFormat)
        mock_target_records_format = mock_load_plan.records_format
        mock_target_records_format.format_type = 'delimited'
        mock_target_records_format.hints = {}
        mock_directory = Mock(name='mock_directory')
        mock_directory.scheme = 'gs'
        mock_url = Mock(name='mock_url')
        mock_directory.manifest_entry_urls.return_value = [mock_url]

        mock_connection = mock_db.engine.raw_connection.return_value.connection
        mock_client = mock_connection._client
        mock_job = mock_client.load_table_from_file.return_value
        mock_job.output_rows = 42

        mock_directory = Mock(name='directory')
        mock_directory.scheme = 's3'
        mock_file_url = MagicMock(name='file_url')
        mock_directory.manifest_entry_urls.return_value = [mock_file_url]
        mock_file_loc = mock_url_resolver.file_url.return_value
        mock_fileobj = mock_file_loc.open.return_value.__enter__.return_value

        out = big_query_loader.load(schema=mock_schema,
                                    table=mock_table,
                                    load_plan=mock_load_plan,
                                    directory=mock_directory)
        mock_client.load_table_from_file.\
            assert_called_with(mock_fileobj,
                               'my_project.my_dataset.mytable',
                               job_config=mock_load_job_config.return_value)
        mock_job.result.assert_called_with()

        self.assertEqual(out, mock_job.output_rows)
Example #10
0
 def test_can_load_this_format_false_parquet(self, mock_RecordsLoadPlan,
                                             mock_ProcessingInstructions,
                                             mock_load_job_config):
     mock_db = Mock(name='db')
     mock_source_records_format = Mock(name='source_records_format',
                                       spec=ParquetRecordsFormat)
     mock_source_records_format.format_type = 'delimited'
     mock_processing_instructions = mock_ProcessingInstructions.return_value
     mock_load_plan = mock_RecordsLoadPlan.return_value
     mock_load_plan.records_format = mock_source_records_format
     mock_url_resolver = Mock(name='url_resolver')
     mock_source_records_format.hints = {}
     bigquery_loader = BigQueryLoader(db=mock_db,
                                      url_resolver=mock_url_resolver)
     out = bigquery_loader.can_load_this_format(mock_source_records_format)
     mock_ProcessingInstructions.assert_called_with()
     mock_RecordsLoadPlan.\
         assert_called_with(records_format=mock_source_records_format,
                            processing_instructions=mock_processing_instructions)
     self.assertEqual(False, out)
Example #11
0
    def test_load_with_bad_schema_name(self, mock_load_job_config):
        mock_db = Mock(name='mock_db')
        mock_url_resolver = MagicMock(name='mock_url_resolver')
        big_query_loader = BigQueryLoader(db=mock_db,
                                          url_resolver=mock_url_resolver)
        mock_schema = 'my_project.my_dataset.something_invalid'
        mock_table = Mock(name='mock_table')
        mock_load_plan = Mock(name='mock_load_plan')
        mock_load_plan.records_format =\
            Mock(name='source_records_format', spec=DelimitedRecordsFormat)
        mock_target_records_format = mock_load_plan.records_format
        mock_target_records_format.hints = {}
        mock_directory = Mock(name='mock_directory')
        mock_url = Mock(name='mock_url')
        mock_directory.manifest_entry_urls.return_value = [mock_url]

        with self.assertRaises(ValueError):
            big_query_loader.load(schema=mock_schema,
                                  table=mock_table,
                                  load_plan=mock_load_plan,
                                  directory=mock_directory)
    def test_load_with_default_project(self, mock_load_job_config):
        mock_db = Mock(name='mock_db')
        mock_url_resolver = MagicMock(name='mock_url_resolver')
        big_query_loader = BigQueryLoader(db=mock_db,
                                          url_resolver=mock_url_resolver,
                                          gcs_temp_base_loc=None)
        mock_schema = 'my_dataset'
        mock_table = 'my_table'
        mock_load_plan = Mock(name='mock_load_plan')
        mock_load_plan.records_format = Mock(name='records_format',
                                             spec=DelimitedRecordsFormat)
        mock_target_records_format = mock_load_plan.records_format
        mock_target_records_format.format_type = 'delimited'
        mock_target_records_format.hints = {}
        mock_directory = Mock(name='mock_directory')
        mock_directory.scheme = 'gs'
        mock_url = Mock(name='mock_url')
        mock_directory.manifest_entry_urls.return_value = [mock_url]

        mock_connection = mock_db.engine.raw_connection.return_value.connection
        mock_client = mock_connection._client
        mock_client.get_table.return_value.location = 'some-location'
        mock_job = mock_client.load_table_from_uri.return_value
        mock_job.output_rows = 42
        out = big_query_loader.load(schema=mock_schema,
                                    table=mock_table,
                                    load_plan=mock_load_plan,
                                    directory=mock_directory)
        mock_client.get_table.assert_called_with('my_dataset.my_table')
        mock_client.load_table_from_uri.\
            assert_called_with([mock_url],
                               'my_dataset.my_table',
                               location='some-location',
                               job_config=mock_load_job_config.return_value)
        mock_job.result.assert_called_with()

        self.assertEqual(out, mock_job.output_rows)