def test_write_table_sync_append(self, data_dir, credentials_type): my_writer = writer.Writer( self.get_client(credentials_type=credentials_type)) csv_file_path = data_dir + 'simple_csv/in/tables/table.csv' my_writer.write_table_sync(csv_file_path, os.environ.get('BIGQUERY_DATASET'), fixtures.get_table_configuration()) my_writer.write_table_sync(csv_file_path, os.environ.get('BIGQUERY_DATASET'), fixtures.get_table_configuration(), incremental=True) query = 'SELECT * FROM %s.%s ORDER BY 1 ASC, 2 ASC' % (os.environ.get( 'BIGQUERY_DATASET'), os.environ.get('BIGQUERY_TABLE')) client = self.get_client('service_account_manage') query_job = client.query(query) row_data = list(query_job) assert len(row_data) == 4 assert row_data[0].col1 == 'val1' assert row_data[0].col2 == 1 assert row_data[1].col1 == 'val1' assert row_data[1].col2 == 1 assert row_data[2].col1 == 'val2' assert row_data[2].col2 == 2 assert row_data[3].col1 == 'val2' assert row_data[3].col2 == 2
def test_create_dataset_invalid_name(self, data_dir, credentials_type): my_writer = writer.Writer( self.get_client(credentials_type=credentials_type)) try: my_writer.write_table_sync( data_dir + 'simple_csv/in/tables/table.csv', os.environ.get('BIGQUERY_DATASET') + ' INVALID', fixtures.get_table_configuration()) pytest.fail('Must raise exception.') except exceptions.UserException as err: assert 'Cannot create dataset' in str(err)
def test_write_table_connection_error(self, data_dir, credentials_type): client = self.get_client(credentials_type=credentials_type) error_msg = "Some connection error!" client.load_table_from_file = MagicMock( side_effect=requests.exceptions.ConnectionError(error_msg)) my_writer = writer.Writer(client) try: my_writer.write_table(data_dir + 'simple_csv/in/tables/table.csv', os.environ.get('BIGQUERY_DATASET'), fixtures.get_table_configuration()) except exceptions.UserException as err: msg = 'Loading data into table {}.{} failed: {}'\ .format(os.environ.get('BIGQUERY_DATASET'), os.environ.get('BIGQUERY_TABLE'), error_msg) assert msg in str(err)
def test_write_table_sync_error_too_many_values(self, data_dir, credentials_type): my_writer = writer.Writer( self.get_client(credentials_type=credentials_type)) my_writer.write_table_sync(data_dir + 'simple_csv/in/tables/table.csv', os.environ.get('BIGQUERY_DATASET'), fixtures.get_table_configuration()) try: my_writer.write_table_sync( data_dir + 'simple_csv_with_extra_column/in/tables/table.csv', os.environ.get('BIGQUERY_DATASET'), fixtures.get_table_configuration_with_extra_column(), True) pytest.fail('Must raise exception.') except exceptions.UserException as err: assert 'Too many values in row' in str(err)
def test_write_table_sync_error_invalid_datatype(self, data_dir, credentials_type): my_writer = writer.Writer( self.get_client(credentials_type=credentials_type)) my_writer.write_table_sync(data_dir + 'simple_csv/in/tables/table.csv', os.environ.get('BIGQUERY_DATASET'), fixtures.get_table_configuration()) try: my_writer.write_table_sync( data_dir + 'simple_csv_invalid_data_types/in/tables/table.csv', os.environ.get('BIGQUERY_DATASET'), fixtures.get_table_configuration_with_invalid_data_type(), True) pytest.fail('Must raise exception.') except exceptions.UserException as err: assert 'Could not parse \'val1\' as INT64 for field col2' in str( err)
def test_invalid_project(self, data_dir, credentials_type): if (credentials_type == 'service_account'): credentials = self.get_service_account_user_credentials() else: credentials = self.get_oauth_credentials() bigquery_client = bigquery.Client('invalid-project', credentials) my_writer = writer.Writer(bigquery_client) try: my_writer.write_table_sync( data_dir + 'simple_csv/in/tables/table.csv', os.environ.get('BIGQUERY_DATASET'), fixtures.get_table_configuration()) pytest.fail('Must raise exception.') except exceptions.UserException as err: assert 'Project invalid-project was not found.' in str(err)
def test_write_table_sync_with_invalid_column_order( self, data_dir, credentials_type): my_writer = writer.Writer( self.get_client(credentials_type=credentials_type)) my_writer.write_table_sync(data_dir + 'simple_csv/in/tables/table.csv', os.environ.get('BIGQUERY_DATASET'), fixtures.get_table_configuration()) try: my_writer.write_table_sync( '%ssimple_csv_invalid_column_order/in/tables/table.csv' % data_dir, os.environ.get('BIGQUERY_DATASET'), fixtures.get_table_configuration_with_invalid_column_order(), True) pytest.fail('Must raise exception.') except exceptions.UserException as err: assert 'Column order mismatch. Actual configuration: '\ 'col2, col1. Expected BigQuery: col1, col2.'\ in str(err)
def test_write_table_sync_newlines(self, data_dir, credentials_type): my_writer = writer.Writer( self.get_client(credentials_type=credentials_type)) my_writer.write_table_sync(data_dir + 'newlines/in/tables/table.csv', os.environ.get('BIGQUERY_DATASET'), fixtures.get_table_configuration()) query = 'SELECT * FROM %s.%s ORDER BY 1 ASC' % (os.environ.get( 'BIGQUERY_DATASET'), os.environ.get('BIGQUERY_TABLE')) client = self.get_client('service_account_manage') query_job = client.query(query) row_data = list(query_job) assert len(row_data) == 2 assert row_data[0].col1 == 'val1\non new line' assert row_data[0].col2 == 1 assert row_data[1].col1 == 'val2' assert row_data[1].col2 == 2
def test_invalid_token(self, data_dir): invalid_credentials = Credentials( 'access-token', token_uri='https://accounts.google.com/o/oauth2/token', client_id=os.environ.get('OAUTH_CLIENT_ID'), client_secret=os.environ.get('OAUTH_CLIENT_SECRET')) bigquery_client = bigquery.Client(self.get_project(), invalid_credentials) my_writer = writer.Writer(bigquery_client) try: my_writer.write_table(data_dir + 'simple_csv/in/tables/table.csv', os.environ.get('BIGQUERY_DATASET'), fixtures.get_table_configuration()) pytest.fail('Must raise exception.') except RefreshError as err: assert str(err) == 'The credentials do not contain the necessary' \ ' fields need to refresh the access token.' \ ' You must specify refresh_token, token_uri,' \ ' client_id, and client_secret.'
def test_write_table_schema(self, data_dir, credentials_type): my_writer = writer.Writer( self.get_client(credentials_type=credentials_type)) my_writer.write_table_sync(data_dir + 'simple_csv/in/tables/table.csv', os.environ.get('BIGQUERY_DATASET'), fixtures.get_table_configuration()) client = self.get_client('service_account_manage') dataset = client.dataset(os.environ.get('BIGQUERY_DATASET')) table_reference = dataset.table(os.environ.get('BIGQUERY_TABLE')) table = client.get_table(table_reference) assert table.schema[0].field_type == 'STRING' assert table.schema[0].fields == () assert table.schema[0].mode == 'NULLABLE' assert table.schema[0].name == 'col1' assert table.schema[1].field_type == 'INTEGER' assert table.schema[1].fields == () assert table.schema[1].mode == 'NULLABLE' assert table.schema[1].name == 'col2'
def test_write_table_async(self, data_dir, credentials_type): my_writer = writer.Writer( self.get_client(credentials_type=credentials_type)) job = my_writer.write_table( data_dir + 'simple_csv/in/tables/table.csv', os.environ.get('BIGQUERY_DATASET'), fixtures.get_table_configuration()) assert job.state == 'RUNNING' client = self.get_client('service_account_manage') dataset_reference = client.dataset(os.environ.get('BIGQUERY_DATASET')) try: dataset = client.get_dataset(dataset_reference) except exceptions.NotFound: pytest.fail('Must not raise an exception.') table_reference = dataset.table(os.environ.get('BIGQUERY_TABLE')) try: client.get_table(table_reference) except exceptions.NotFound: pytest.fail('Must not raise an exception.')