Example #1
0
 def test_action_not_defined(self, data_dir):
     os.environ['KBC_DATADIR'] = data_dir + "invalid_action/"
     application = app.App()
     try:
         application.run()
         pytest.fail("Must raise exception.")
     except exceptions.UserException as err:
         assert str(err) == "Action invalid not defined"
         pass
Example #2
0
 def test_missing_authorization(self, data_dir):
     os.environ['KBC_DATADIR'] = data_dir + "missing_authorization/"
     application = app.App()
     try:
         application.run()
         pytest.fail("Must raise exception.")
     except exceptions.UserException as err:
         assert str(err) == "Authorization missing."
         pass
Example #3
0
 def test_empty_config(self, data_dir):
     os.environ['KBC_DATADIR'] = data_dir + "empty_config/"
     application = app.App()
     try:
         application.run()
         pytest.fail("Must raise exception.")
     except exceptions.UserException as err:
         assert str(err) == "Configuration is empty."
         pass
 def test_run_authorization_missing_user_exception(self, data_dir):
     os.environ['KBC_DATADIR'] = data_dir + 'missing_authorization/'
     application = app.App()
     try:
         application.run()
         pytest.fail('Must raise exception')
     except UserException as err:
         assert str(err) ==\
                'Authorization missing.'
Example #5
0
 def test_invalid_authorization(self, data_dir):
     os.environ['KBC_DATADIR'] = data_dir + "invalid_authorization/"
     application = app.App()
     try:
         application.run()
         pytest.fail("Must raise exception.")
     except exceptions.UserException as err:
         assert str(err) == 'Cannot connect to BigQuery.' \
                            ' Please try reauthorizing.'
         pass
Example #6
0
 def test_empty_dataset(self, data_dir):
     os.environ['KBC_DATADIR'] = data_dir + "empty_dataset/"
     application = app.App()
     try:
         application.run()
         pytest.fail("Must raise exception.")
     except exceptions.UserException as err:
         assert str(err) == \
                "Google BigQuery dataset not specified in the configuration."
         pass
Example #7
0
 def test_missing_input(self, data_dir):
     os.environ['KBC_DATADIR'] = data_dir + "missing_input/"
     application = app.App()
     try:
         application.run()
         pytest.fail("Must raise exception.")
     except exceptions.UserException as err:
         assert str(err) == "Missing input mapping for " \
             "table in.c-main.table1."
         pass
Example #8
0
 def test_service_account_missing_private_key(self, data_dir):
     os.environ[
         'KBC_DATADIR'] = data_dir + "service_account_missing_private_key/"
     application = app.App()
     try:
         application.run()
         pytest.fail("Must raise exception.")
     except exceptions.UserException as err:
         assert str(err) == 'Service account private key missing.'
         pass
    def test_invalid_dataset_name(self, data_dir, capsys, credentials_type):
        os.environ['KBC_DATADIR'] = '%ssample_populated/'\
                                    % data_dir
        self.prepare(action='run',
                     data_dir=data_dir,
                     credentials_type=credentials_type)
        application = app.App()

        with pytest.raises(UserException, match=r'Invalid dataset ID'):
            application.run()
Example #10
0
 def test_invalid_authorization_private_key(self, data_dir):
     os.environ[
         'KBC_DATADIR'] = data_dir + "invalid_authorization_private_key/"
     application = app.App()
     try:
         application.run()
         pytest.fail("Must raise exception.")
     except exceptions.UserException as err:
         assert str(err) == 'Cannot get credentials from service account ' \
                            '[email protected].' \
                            'gserviceaccount.com. Reason ' \
                            '"No key could be detected.".'
         pass
    def test_list(self, data_dir, capsys, credentials_type):
        client = self.get_client('service_account_manage')
        dataset_reference = bigquery.DatasetReference(
            self.get_project(), os.environ.get('BIGQUERY_DATASET'))
        dataset = bigquery.Dataset(dataset_reference)
        client.create_dataset(dataset)

        os.environ['KBC_DATADIR'] = data_dir + 'sample_populated/'
        self.prepare(action='list',
                     data_dir=data_dir,
                     credentials_type=credentials_type)
        application = app.App()
        application.run()
        out, err = capsys.readouterr()
        assert err == ''
        data = json.loads(out)
        assert 'projects' in data.keys()
        assert self.get_project() in map(lambda project: project['id'],
                                         data['projects'])
        project = list(
            filter(lambda project: project['id'] == self.get_project(),
                   data['projects']))[0]
        assert os.environ.get('BIGQUERY_DATASET') in map(
            lambda dataset: dataset['id'], project['datasets'])
    def test_successful_run(self, data_dir, capsys, credentials_type):
        os.environ['KBC_DATADIR'] = '%ssample_populated/'\
                                    % data_dir
        self.prepare(action='run',
                     data_dir=data_dir,
                     credentials_type=credentials_type)
        # run app
        application = app.App()
        application.run()

        # assertions
        out, err = capsys.readouterr()
        assert err == ''
        assert out == 'Loading table in.c-bucket.table1 into BigQuery ' \
            'as %s.table1\n' \
            'Loading table in.c-bucket.table2 into BigQuery as %s.table2\n' \
            'BigQuery Writer finished\n' % (
                os.environ.get('BIGQUERY_DATASET'),
                os.environ.get('BIGQUERY_DATASET')
            )

        client = self.get_client('service_account_manage')

        # check for only the testing dataset
        datasets = list(client.list_datasets())
        assert len(datasets) >= 1
        # todo find the required dataset
        matching_datasets = list(
            filter(
                lambda dataset: dataset.dataset_id == os.environ.get(
                    'BIGQUERY_DATASET'), datasets))

        assert len(matching_datasets) == 1
        assert matching_datasets[0].dataset_id == \
            os.environ.get('BIGQUERY_DATASET')

        tables = list(client.list_tables(matching_datasets[0].reference))
        assert len(tables) == 2
        assert tables[0].reference.table_id == 'table1'
        assert tables[1].reference.table_id == 'table2'

        table_reference = matching_datasets[0].table('table1')
        table = client.get_table(table_reference)

        rcvd_schema = table.schema
        assert rcvd_schema[0].field_type == 'STRING'
        assert rcvd_schema[0].fields == ()
        assert rcvd_schema[0].mode == 'NULLABLE'
        assert rcvd_schema[0].name == 'string'
        assert rcvd_schema[1].field_type == 'INTEGER'
        assert rcvd_schema[1].fields == ()
        assert rcvd_schema[1].mode == 'NULLABLE'
        assert rcvd_schema[1].name == 'integer'
        assert rcvd_schema[2].field_type == 'FLOAT'
        assert rcvd_schema[2].fields == ()
        assert rcvd_schema[2].mode == 'NULLABLE'
        assert rcvd_schema[2].name == 'float'
        assert rcvd_schema[3].field_type == 'BOOLEAN'
        assert rcvd_schema[3].fields == ()
        assert rcvd_schema[3].mode == 'NULLABLE'
        assert rcvd_schema[3].name == 'boolean'
        assert rcvd_schema[4].field_type == 'TIMESTAMP'
        assert rcvd_schema[4].fields == ()
        assert rcvd_schema[4].mode == 'NULLABLE'
        assert rcvd_schema[4].name == 'timestamp'

        query = 'SELECT * FROM %s.%s ORDER BY 1 DESC' % (
            os.environ.get('BIGQUERY_DATASET'), 'table1')

        query_job = client.query(query)
        row_data = list(query_job)

        assert len(row_data) == 3
        assert row_data[0][0] == 'MyString'
        assert row_data[0][1] == 123456
        assert row_data[0][2] == 123.456
        assert row_data[0][3] is True
        assert row_data[0][4] == datetime(2014,
                                          8,
                                          19,
                                          12,
                                          41,
                                          35,
                                          220000,
                                          tzinfo=timezone.utc)
        assert row_data[1][0] == ''
        assert row_data[1][1] == 0
        assert row_data[1][2] == 0
        assert row_data[1][3] is False
        assert row_data[1][4] is None

        assert row_data[2][0] is None
        assert row_data[2][1] is None
        assert row_data[2][2] is None
        assert row_data[2][3] is None
        assert row_data[2][4] is None

        query = 'SELECT * FROM %s.%s' % (os.environ.get('BIGQUERY_DATASET'),
                                         'table2')
        query_job = client.query(query)

        row_data = list(query_job)
        assert len(row_data) == 3

        # run app second time (increments)
        application = app.App()
        application.run()

        query = 'SELECT * FROM %s.%s' % (os.environ.get('BIGQUERY_DATASET'),
                                         'table1')
        query_job = client.query(query)
        row_data = list(query_job)
        assert len(row_data) == 3

        query = 'SELECT * FROM %s.%s' % (os.environ.get('BIGQUERY_DATASET'),
                                         'table2')
        query_job = client.query(query)
        row_data = list(query_job)
        assert len(row_data) == 6

        out, err = capsys.readouterr()
        assert err == ''
        assert out == 'Loading table in.c-bucket.table1 into BigQuery ' \
            'as %s.table1\n' \
            'Loading table in.c-bucket.table2 into BigQuery as %s.table2\n' \
            'BigQuery Writer finished\n' % (
                os.environ.get('BIGQUERY_DATASET'),
                os.environ.get('BIGQUERY_DATASET')
            )