def test_action_not_defined(self, data_dir): os.environ['KBC_DATADIR'] = data_dir + "invalid_action/" application = app.App() try: application.run() pytest.fail("Must raise exception.") except exceptions.UserException as err: assert str(err) == "Action invalid not defined" pass
def test_missing_authorization(self, data_dir): os.environ['KBC_DATADIR'] = data_dir + "missing_authorization/" application = app.App() try: application.run() pytest.fail("Must raise exception.") except exceptions.UserException as err: assert str(err) == "Authorization missing." pass
def test_empty_config(self, data_dir): os.environ['KBC_DATADIR'] = data_dir + "empty_config/" application = app.App() try: application.run() pytest.fail("Must raise exception.") except exceptions.UserException as err: assert str(err) == "Configuration is empty." pass
def test_run_authorization_missing_user_exception(self, data_dir): os.environ['KBC_DATADIR'] = data_dir + 'missing_authorization/' application = app.App() try: application.run() pytest.fail('Must raise exception') except UserException as err: assert str(err) ==\ 'Authorization missing.'
def test_invalid_authorization(self, data_dir): os.environ['KBC_DATADIR'] = data_dir + "invalid_authorization/" application = app.App() try: application.run() pytest.fail("Must raise exception.") except exceptions.UserException as err: assert str(err) == 'Cannot connect to BigQuery.' \ ' Please try reauthorizing.' pass
def test_empty_dataset(self, data_dir): os.environ['KBC_DATADIR'] = data_dir + "empty_dataset/" application = app.App() try: application.run() pytest.fail("Must raise exception.") except exceptions.UserException as err: assert str(err) == \ "Google BigQuery dataset not specified in the configuration." pass
def test_missing_input(self, data_dir): os.environ['KBC_DATADIR'] = data_dir + "missing_input/" application = app.App() try: application.run() pytest.fail("Must raise exception.") except exceptions.UserException as err: assert str(err) == "Missing input mapping for " \ "table in.c-main.table1." pass
def test_service_account_missing_private_key(self, data_dir): os.environ[ 'KBC_DATADIR'] = data_dir + "service_account_missing_private_key/" application = app.App() try: application.run() pytest.fail("Must raise exception.") except exceptions.UserException as err: assert str(err) == 'Service account private key missing.' pass
def test_invalid_dataset_name(self, data_dir, capsys, credentials_type): os.environ['KBC_DATADIR'] = '%ssample_populated/'\ % data_dir self.prepare(action='run', data_dir=data_dir, credentials_type=credentials_type) application = app.App() with pytest.raises(UserException, match=r'Invalid dataset ID'): application.run()
def test_invalid_authorization_private_key(self, data_dir): os.environ[ 'KBC_DATADIR'] = data_dir + "invalid_authorization_private_key/" application = app.App() try: application.run() pytest.fail("Must raise exception.") except exceptions.UserException as err: assert str(err) == 'Cannot get credentials from service account ' \ '[email protected].' \ 'gserviceaccount.com. Reason ' \ '"No key could be detected.".' pass
def test_list(self, data_dir, capsys, credentials_type): client = self.get_client('service_account_manage') dataset_reference = bigquery.DatasetReference( self.get_project(), os.environ.get('BIGQUERY_DATASET')) dataset = bigquery.Dataset(dataset_reference) client.create_dataset(dataset) os.environ['KBC_DATADIR'] = data_dir + 'sample_populated/' self.prepare(action='list', data_dir=data_dir, credentials_type=credentials_type) application = app.App() application.run() out, err = capsys.readouterr() assert err == '' data = json.loads(out) assert 'projects' in data.keys() assert self.get_project() in map(lambda project: project['id'], data['projects']) project = list( filter(lambda project: project['id'] == self.get_project(), data['projects']))[0] assert os.environ.get('BIGQUERY_DATASET') in map( lambda dataset: dataset['id'], project['datasets'])
def test_successful_run(self, data_dir, capsys, credentials_type): os.environ['KBC_DATADIR'] = '%ssample_populated/'\ % data_dir self.prepare(action='run', data_dir=data_dir, credentials_type=credentials_type) # run app application = app.App() application.run() # assertions out, err = capsys.readouterr() assert err == '' assert out == 'Loading table in.c-bucket.table1 into BigQuery ' \ 'as %s.table1\n' \ 'Loading table in.c-bucket.table2 into BigQuery as %s.table2\n' \ 'BigQuery Writer finished\n' % ( os.environ.get('BIGQUERY_DATASET'), os.environ.get('BIGQUERY_DATASET') ) client = self.get_client('service_account_manage') # check for only the testing dataset datasets = list(client.list_datasets()) assert len(datasets) >= 1 # todo find the required dataset matching_datasets = list( filter( lambda dataset: dataset.dataset_id == os.environ.get( 'BIGQUERY_DATASET'), datasets)) assert len(matching_datasets) == 1 assert matching_datasets[0].dataset_id == \ os.environ.get('BIGQUERY_DATASET') tables = list(client.list_tables(matching_datasets[0].reference)) assert len(tables) == 2 assert tables[0].reference.table_id == 'table1' assert tables[1].reference.table_id == 'table2' table_reference = matching_datasets[0].table('table1') table = client.get_table(table_reference) rcvd_schema = table.schema assert rcvd_schema[0].field_type == 'STRING' assert rcvd_schema[0].fields == () assert rcvd_schema[0].mode == 'NULLABLE' assert rcvd_schema[0].name == 'string' assert rcvd_schema[1].field_type == 'INTEGER' assert rcvd_schema[1].fields == () assert rcvd_schema[1].mode == 'NULLABLE' assert rcvd_schema[1].name == 'integer' assert rcvd_schema[2].field_type == 'FLOAT' assert rcvd_schema[2].fields == () assert rcvd_schema[2].mode == 'NULLABLE' assert rcvd_schema[2].name == 'float' assert rcvd_schema[3].field_type == 'BOOLEAN' assert rcvd_schema[3].fields == () assert rcvd_schema[3].mode == 'NULLABLE' assert rcvd_schema[3].name == 'boolean' assert rcvd_schema[4].field_type == 'TIMESTAMP' assert rcvd_schema[4].fields == () assert rcvd_schema[4].mode == 'NULLABLE' assert rcvd_schema[4].name == 'timestamp' query = 'SELECT * FROM %s.%s ORDER BY 1 DESC' % ( os.environ.get('BIGQUERY_DATASET'), 'table1') query_job = client.query(query) row_data = list(query_job) assert len(row_data) == 3 assert row_data[0][0] == 'MyString' assert row_data[0][1] == 123456 assert row_data[0][2] == 123.456 assert row_data[0][3] is True assert row_data[0][4] == datetime(2014, 8, 19, 12, 41, 35, 220000, tzinfo=timezone.utc) assert row_data[1][0] == '' assert row_data[1][1] == 0 assert row_data[1][2] == 0 assert row_data[1][3] is False assert row_data[1][4] is None assert row_data[2][0] is None assert row_data[2][1] is None assert row_data[2][2] is None assert row_data[2][3] is None assert row_data[2][4] is None query = 'SELECT * FROM %s.%s' % (os.environ.get('BIGQUERY_DATASET'), 'table2') query_job = client.query(query) row_data = list(query_job) assert len(row_data) == 3 # run app second time (increments) application = app.App() application.run() query = 'SELECT * FROM %s.%s' % (os.environ.get('BIGQUERY_DATASET'), 'table1') query_job = client.query(query) row_data = list(query_job) assert len(row_data) == 3 query = 'SELECT * FROM %s.%s' % (os.environ.get('BIGQUERY_DATASET'), 'table2') query_job = client.query(query) row_data = list(query_job) assert len(row_data) == 6 out, err = capsys.readouterr() assert err == '' assert out == 'Loading table in.c-bucket.table1 into BigQuery ' \ 'as %s.table1\n' \ 'Loading table in.c-bucket.table2 into BigQuery as %s.table2\n' \ 'BigQuery Writer finished\n' % ( os.environ.get('BIGQUERY_DATASET'), os.environ.get('BIGQUERY_DATASET') )