def test_relaunch_failed_jobs_startsExtractJob(
            self, bigquery_module_patch: bigquery):
        bigquery_module_patch.Client.return_value = self.client_mock
        table = DUMMY_TABLE_NAME
        job = self.__create_extract_job_mock("prefixed-some-job-id", table,
                                             True)
        self.client_mock.list_jobs.return_value = [job]

        ems_bigquery_client = EmsBigqueryClient("some-project-id", "valhalla")
        ems_bigquery_client.relaunch_failed_jobs("prefixed", MIN_CREATION_TIME)

        arguments = self.client_mock.extract_table.call_args_list[0][1]
        self.assertEqual("prefixed-retry-1-", arguments["job_id_prefix"])
        self.assertEqual(arguments["destination_uris"], job.destination_uris)
        self.assertEqual(arguments["job_id_prefix"], "prefixed-retry-1-")
        self.assertEqual(arguments["location"], "valhalla")
        self.assertEqual(arguments["source"],
                         TableReference.from_string(table))

        self.assertEqual(arguments["job_config"].compression, 'NONE')
        self.assertEqual(arguments["job_config"].destination_format,
                         job.destination_format)
        self.assertEqual(arguments["job_config"].field_delimiter,
                         job.field_delimiter)
        self.assertEqual(arguments["job_config"].print_header,
                         job.print_header)
        self.assertEqual(arguments["job_config"].labels,
                         {"label1": "label1_value"})
 def setUp(self):
     table_name = "test_table_" + str(
         int(datetime.datetime.utcnow().timestamp() * 1000))
     self.test_table = self.__create_test_table(table_name,
                                                self.DATASET.reference)
     self.client = EmsBigqueryClient(GCP_PROJECT_ID)
     self.storage_client = storage.Client()
    def test_get_job_list_returnWithEmptyIterator(
            self, bigquery_module_patch: bigquery):
        bigquery_module_patch.Client.return_value = self.client_mock
        self.client_mock.list_jobs.return_value = []

        ems_bigquery_client = EmsBigqueryClient("some-project-id")
        job_list_iterable = ems_bigquery_client.get_job_list()

        result = list(job_list_iterable)
        assert result == []
    def test_get_job_list_returnWithEmsLoadJobIterator(
            self, bigquery_module_patch: bigquery):
        bigquery_module_patch.Client.return_value = self.client_mock
        load_job_mock = Mock(LoadJob)
        load_job_mock.job_id = "123"
        load_job_mock.query = "SELECT 1"
        load_job_mock.state = "DONE"
        load_job_mock.write_disposition = None
        load_job_mock.create_disposition = None
        load_job_mock.error_result = None
        load_job_mock.source_uris = ["gs://some-bucket-id/some-blob-id"]
        destination = Mock(TableReference)
        destination.project = "some-other-project-id"
        destination.dataset_id = "some-destination-dataset"
        destination.table_id = "some-destination-table"
        load_job_mock.destination = destination
        expected_schema = {
            "fields": [{
                "description": None,
                "mode": "NULLABLE",
                "type": "STRING",
                "name": "fruit"
            }]
        }
        load_job_mock.schema = _parse_schema_resource(expected_schema)

        self.client_mock.list_jobs.return_value = [load_job_mock]

        ems_bigquery_client = EmsBigqueryClient("some-project-id")
        job_list_iterable = ems_bigquery_client.get_job_list()

        result = list(job_list_iterable)
        self.assertEqual(1, len(result))
        result_job = result[0]
        self.assertIsInstance(result_job, EmsLoadJob)
        self.assertEqual(EmsJobState("DONE"), result_job.state)
        self.assertEqual("123", result_job.job_id)
        self.assertFalse(result_job.is_failed)
        self.assertIsInstance(result_job.load_config, EmsLoadJobConfig)
        self.assertEqual("some-other-project-id",
                         result_job.load_config.destination_project_id)
        self.assertEqual("some-destination-dataset",
                         result_job.load_config.destination_dataset)
        self.assertEqual("some-destination-table",
                         result_job.load_config.destination_table)
        self.assertEqual(expected_schema, result_job.load_config.schema)
        self.assertEqual("gs://some-bucket-id/some-blob-id",
                         result_job.load_config.source_uri_template)
    def test_get_job_list_returnsJobWithEmsQueryJobConfigWithoutDestination(
            self, bigquery_module_patch: bigquery):
        bigquery_module_patch.Client.return_value = self.client_mock
        self.query_job_mock.job_id = "123"
        self.query_job_mock.query = "SELECT 1"
        self.query_job_mock.state = "DONE"
        self.query_job_mock.destination = None
        self.client_mock.list_jobs.return_value = [self.query_job_mock]

        ems_bigquery_client = EmsBigqueryClient("some-project-id")
        job_list_iterable = ems_bigquery_client.get_job_list()

        result = list(job_list_iterable)
        self.assertEqual(result[0].query_config.destination_project_id, None)
        self.assertEqual(result[0].query_config.destination_dataset, None)
        self.assertEqual(result[0].query_config.destination_table, None)
    def test_relaunch_failed_jobs_startsQueryJob(
            self, bigquery_module_patch: bigquery):
        bigquery_module_patch.Client.return_value = self.client_mock
        job = self.__create_query_job_mock("prefixed-some-job-id", True)
        self.client_mock.list_jobs.return_value = [job]

        ems_bigquery_client = EmsBigqueryClient("some-project-id")
        ems_bigquery_client.relaunch_failed_jobs("prefixed", MIN_CREATION_TIME)

        arguments = self.client_mock.query.call_args_list[0][1]
        self.assertEqual("prefixed-retry-1-", arguments["job_id_prefix"])
        self.assertEqual(arguments["query"], "SIMPLE QUERY")
        self.assertEqual(arguments["job_config"].time_partitioning,
                         job.time_partitioning)
        self.assertEqual(arguments["job_config"].labels,
                         {"label1": "label1_value"})
    def test_get_jobs_with_prefix_returnsEmptyIfNoJobFoundWithTheGivenPrefix(
            self, bigquery_module_patch: bigquery):
        bigquery_module_patch.Client.return_value = self.client_mock
        self.client_mock.list_jobs.return_value = []

        ems_bigquery_client = EmsBigqueryClient("some-project-id")
        min_creation_time = datetime.now()
        query_jobs = ems_bigquery_client.get_jobs_with_prefix(
            "prefixed", min_creation_time, all_users=False)

        self.assertEqual(query_jobs, [])
        self.client_mock.list_jobs.assert_called_with(
            all_users=False,
            max_results=20,
            min_creation_time=min_creation_time,
            max_creation_time=None)
    def test_run_async_load_job_submitsLoadJobAndReturnsJobIdWithProperConfig(
            self, bigquery_module_patch: bigquery):
        project_id = "some-project-id"
        source_uri = "gs://some-source-uri/to_object"
        bigquery_module_patch.Client.return_value = self.client_mock
        input_json_schema = {
            "fields": [{
                "type": "STRING",
                "name": "f1"
            }, {
                "mode": "REQUIRED",
                "type": "INTEGER",
                "name": "f2"
            }]
        }
        load_job_config = EmsLoadJobConfig(
            destination_project_id="some-destination-project-id",
            destination_dataset="some-destination-dataset",
            destination_table="some-destination-table",
            schema=input_json_schema,
            source_uri_template=source_uri,
            labels={"label1": "label1_value"})
        self.load_job_mock = Mock(LoadJob)
        self.load_job_mock.job_id = self.JOB_ID
        self.client_mock.load_table_from_uri.return_value = self.load_job_mock

        ems_bigquery_client = EmsBigqueryClient(project_id)
        result_job_id = ems_bigquery_client.run_async_load_job(
            "prefix", load_job_config)

        arguments = self.client_mock.load_table_from_uri.call_args_list[0][1]
        self.assertEqual(arguments["source_uris"], source_uri)
        self.assertEqual(arguments["job_id_prefix"], "prefix")
        self.assertEqual(result_job_id, "some-job-id")
        job_config = arguments["job_config"]
        self.assertIsInstance(job_config, LoadJobConfig)
        self.assertEqual(job_config.create_disposition,
                         EmsCreateDisposition.CREATE_IF_NEEDED.value)
        self.assertEqual(job_config.write_disposition,
                         EmsWriteDisposition.WRITE_APPEND.value)
        self.assertEqual(job_config.labels, {"label1": "label1_value"})

        field1 = SchemaField("f1", "STRING")
        field2 = SchemaField("f2", "INTEGER", "REQUIRED")
        self.assertEqual(job_config.schema, [field1, field2])
    def test_wait_for_job_done_delegatesCallToOriginalJob(
            self, bigquery_module_patch: bigquery):
        bigquery_module_patch.Client.return_value = self.client_mock
        self.client_mock.get_job.return_value = self.query_job_mock
        self.query_job_mock.job_id = "1234"
        self.query_job_mock.priority = "INTERACTIVE"
        self.query_job_mock.state = "DONE"
        self.query_job_mock.result.return_value = []  # we dont care

        timeout = 123.
        ems_bigquery_client = EmsBigqueryClient("some-project-id")

        job = ems_bigquery_client.wait_for_job_done("job_id", timeout)

        self.query_job_mock.result.assert_called_with(timeout=timeout)
        self.assertIsInstance(job, EmsQueryJob)
        self.assertEqual(job.job_id, "1234")
        self.assertEqual(job.state, EmsJobState.DONE)
    def test_get_job_list_returnsJobsWithCreatedTime(
            self, bigquery_module_patch: bigquery):
        bigquery_module_patch.Client.return_value = self.client_mock

        created1 = datetime.fromtimestamp(123456)
        created2 = datetime.fromtimestamp(234567)
        first_job = self.__create_query_job_mock(
            "prefixed-retry-2-some-job-id", True, created1)
        second_job = self.__create_extract_job_mock(
            "prefixed-retry-2-some-job-id", "p.d.table1", False, created2)
        self.client_mock.list_jobs.return_value = [first_job, second_job]

        ems_bigquery_client = EmsBigqueryClient("some-project-id")

        jobs = ems_bigquery_client.get_jobs_with_prefix("prefixed", created1)

        assert jobs[0].created == created1
        assert jobs[1].created == created2
    def test_get_job_list_returnsJobWithEmsQueryJobConfigWithDispositionsConvertedCorrectly(
            self, bigquery_module_patch: bigquery):
        bigquery_module_patch.Client.return_value = self.client_mock
        self.query_job_mock.job_id = "123"
        self.query_job_mock.query = "SELECT 1"
        self.query_job_mock.state = "DONE"
        self.query_job_mock.write_disposition = "WRITE_APPEND"
        self.query_job_mock.create_disposition = "CREATE_IF_NEEDED"
        self.client_mock.list_jobs.return_value = [self.query_job_mock]

        ems_bigquery_client = EmsBigqueryClient("some-project-id")
        job_list_iterable = ems_bigquery_client.get_job_list()

        result = list(job_list_iterable)
        self.assertEqual(result[0].query_config.write_disposition,
                         EmsWriteDisposition.WRITE_APPEND)
        self.assertEqual(result[0].query_config.create_disposition,
                         EmsCreateDisposition.CREATE_IF_NEEDED)
    def __setup_client(self,
                       bigquery_module_patch,
                       return_value=None,
                       location=None):
        project_id = "some-project-id"
        bigquery_module_patch.Client.return_value = self.client_mock
        self.client_mock.project = "some-project-id"
        self.client_mock.query.return_value = self.query_job_mock
        self.query_job_mock.job_id = self.JOB_ID
        if location is not None:
            ems_bigquery_client = EmsBigqueryClient(project_id, location)
        else:
            ems_bigquery_client = EmsBigqueryClient(project_id)

        if return_value is not None:
            self.query_job_mock.result.return_value = return_value

        return ems_bigquery_client
    def test_relaunch_failed_jobs_canRetryMoreThanNineTimes(
            self, bigquery_module_patch: bigquery):
        bigquery_module_patch.Client.return_value = self.client_mock
        nineth_job = self.__create_query_job_mock(
            "prefixed-retry-9-some-job-id", True)
        tenth_job = self.__create_query_job_mock(
            "prefixed-retry-10-some-job-id", True)
        self.client_mock.list_jobs.return_value = [nineth_job, tenth_job]

        ems_bigquery_client = EmsBigqueryClient("some-project-id")
        ems_bigquery_client.relaunch_failed_jobs("prefixed",
                                                 MIN_CREATION_TIME,
                                                 max_attempts=12)

        arguments = self.client_mock.query.call_args_list[0][1]
        self.assertEqual("prefixed-retry-10-", arguments["job_id_prefix"])

        arguments = self.client_mock.query.call_args_list[1][1]
        self.assertEqual("prefixed-retry-11-", arguments["job_id_prefix"])
    def test_relaunch_failed_jobs_startsNewJobWithIncreasedRetryIndex(
            self, bigquery_module_patch: bigquery):
        bigquery_module_patch.Client.return_value = self.client_mock
        query_job = self.__create_query_job_mock(
            "prefixed-retry-1-some-random1", True)
        extract_job = self.__create_extract_job_mock(
            "prefixed-retry-1-some-random2", DUMMY_TABLE_NAME, True)
        self.client_mock.list_jobs.return_value = [query_job, extract_job]

        ems_bigquery_client = EmsBigqueryClient("some-project-id")
        ems_bigquery_client.relaunch_failed_jobs("prefixed", MIN_CREATION_TIME)

        self.assertEqual(
            "prefixed-retry-2-",
            self.client_mock.query.call_args_list[0][1]["job_id_prefix"])
        self.assertEqual(
            "prefixed-retry-2-",
            self.client_mock.extract_table.call_args_list[0][1]
            ["job_id_prefix"])
    def test_get_job_list_returnWithEmsQueryJobIterator(
            self, bigquery_module_patch: bigquery):
        bigquery_module_patch.Client.return_value = self.client_mock
        self.query_job_mock.job_id = "123"
        self.query_job_mock.query = "SELECT 1"
        self.query_job_mock.state = "DONE"
        self.query_job_mock.error_result = None
        self.client_mock.list_jobs.return_value = [self.query_job_mock]

        ems_bigquery_client = EmsBigqueryClient("some-project-id")
        job_list_iterable = ems_bigquery_client.get_job_list()

        result = list(job_list_iterable)
        assert len(result) == 1
        assert isinstance(result[0], EmsQueryJob)
        assert result[0].state == EmsJobState("DONE")
        assert result[0].job_id == "123"
        assert result[0].query == "SELECT 1"
        assert result[0].is_failed is False
        assert isinstance(result[0].query_config, EmsQueryJobConfig)
    def test_run_async_extract_job_submitsExtractJobAndReturnsJobIdWithProperConfig(
            self, bigquery_module_patch: bigquery):
        project_id = "some-project-id"
        table = "some-project.some-dataset.some-table"
        destination_uris = [
            "gs://some-source-uri/to_object1",
            "gs://some-source-uri/to_object2"
        ]
        job_prefix = "some_job_prefix"
        bigquery_module_patch.Client.return_value = self.client_mock

        expected_job_id = self.JOB_ID
        self.extract_job_mock = Mock(ExtractJob)
        self.extract_job_mock.job_id = expected_job_id
        self.client_mock.extract_table.return_value = self.extract_job_mock
        ems_job_config = EmsExtractJobConfig(
            compression=Compression.GZIP,
            destination_format=DestinationFormat.CSV,
            field_delimiter="Deli mit R",
            print_header=True,
            labels={"label1": "label1_value"})

        ems_bigquery_client = EmsBigqueryClient(project_id, "Emelet")
        result_job_id = ems_bigquery_client.run_async_extract_job(
            job_id_prefix=job_prefix,
            table=table,
            destination_uris=destination_uris,
            job_config=ems_job_config)
        call_args_list = self.client_mock.extract_table.call_args_list
        args = call_args_list[0][1]

        assert args["location"] == "Emelet"
        assert args["source"] == TableReference.from_string(table_id=table)
        assert args["job_id_prefix"] == job_prefix
        assert args["destination_uris"] == destination_uris
        assert args["job_config"].compression == "GZIP"
        assert args["job_config"].destination_format == "CSV"
        assert args["job_config"].field_delimiter == "Deli mit R"
        assert args["job_config"].print_header == True
        assert args["job_config"].labels == {"label1": "label1_value"}
        assert result_job_id == expected_job_id
    def test_relaunch_failed_jobs_raisesExceptionIfRetryCountExceedsTheGivenLimit(
            self, bigquery_module_patch: bigquery):
        bigquery_module_patch.Client.return_value = self.client_mock
        first_job = self.__create_query_job_mock(
            "prefixed-retry-2-some-job-id", True)
        self.client_mock.list_jobs.return_value = [first_job]

        ems_bigquery_client = EmsBigqueryClient("some-project-id")

        self.assertRaises(RetryLimitExceededError,
                          ems_bigquery_client.relaunch_failed_jobs, "prefixed",
                          MIN_CREATION_TIME)
    def test_get_jobs_for_prefix_returnsFilteredJobs_ifJobFoundWithSpecificJobIdPrefix(
            self, bigquery_module_patch: bigquery):
        bigquery_module_patch.Client.return_value = self.client_mock
        failed_prefixed_query_job_mock = self.__create_query_job_mock(
            "prefixed-some-job-id1", True)
        succeeded_prefixed_query_job_mock = self.__create_query_job_mock(
            "prefixed-some-job-id2", False)
        succeeded_non_prefixed_query_job_mock = self.__create_query_job_mock(
            "some-job-id", False)

        self.client_mock.list_jobs.return_value = [
            failed_prefixed_query_job_mock, succeeded_prefixed_query_job_mock,
            succeeded_non_prefixed_query_job_mock
        ]

        ems_bigquery_client = EmsBigqueryClient("some-project-id")
        jobs = ems_bigquery_client.get_jobs_with_prefix(
            "prefixed", datetime.now())
        job_ids = [job.job_id for job in jobs]

        self.assertEqual(set(job_ids),
                         {"prefixed-some-job-id1", "prefixed-some-job-id2"})
    def test_relaunch_failed_jobs_startsNewJobForAllFailedJobs(
            self, bigquery_module_patch: bigquery):
        bigquery_module_patch.Client.return_value = self.client_mock
        first_job = self.__create_query_job_mock("prefixed-query", True)
        second_job = self.__create_query_job_mock("prefixed-done", False)
        third_job = self.__create_extract_job_mock("prefixed-extract",
                                                   DUMMY_TABLE_NAME, True)
        self.client_mock.list_jobs.return_value = [
            first_job, second_job, third_job
        ]

        ems_bigquery_client = EmsBigqueryClient("some-project-id")
        ems_bigquery_client.relaunch_failed_jobs("prefixed", MIN_CREATION_TIME)

        self.client_mock.query.assert_called_once()
        self.client_mock.extract_table.assert_called_once()
        self.assertEqual(
            "prefixed-retry-1-",
            self.client_mock.query.call_args_list[0][1]["job_id_prefix"])
        self.assertEqual(
            "prefixed-retry-1-",
            self.client_mock.extract_table.call_args_list[0][1]
            ["job_id_prefix"])
    def test_get_job_list_returnsJobWithEmsQueryJobConfigWithSetDestination(
            self, bigquery_module_patch: bigquery):
        bigquery_module_patch.Client.return_value = self.client_mock
        self.query_job_mock.job_id = "123"
        self.query_job_mock.query = "SELECT 1"
        self.query_job_mock.state = "DONE"
        destination = Mock(TableReference)
        self.query_job_mock.destination = destination
        destination.project = "some-other-project-id"
        destination.dataset_id = "some-destination-dataset"
        destination.table_id = "some-destination-table"
        self.client_mock.list_jobs.return_value = [self.query_job_mock]

        ems_bigquery_client = EmsBigqueryClient("some-project-id")
        job_list_iterable = ems_bigquery_client.get_job_list()

        result = list(job_list_iterable)
        self.assertEqual(result[0].query_config.destination_project_id,
                         "some-other-project-id")
        self.assertEqual(result[0].query_config.destination_dataset,
                         "some-destination-dataset")
        self.assertEqual(result[0].query_config.destination_table,
                         "some-destination-table")
class ItEmsBigqueryClient(TestCase):
    ONE_DAY_IN_MS = 3600000 * 24
    GCP_BIGQUERY_CLIENT = None
    DATASET = None
    DUMMY_QUERY = "SELECT 1 AS data"
    BAD_QUERY = "VERY BAD QUERY"
    INSERT_TEMPLATE = "INSERT INTO `{}` (int_data, str_data) VALUES (1, 'hello')"
    SELECT_TEMPLATE = "SELECT * FROM `{}`"
    DUMMY_SELECT_TO_TABLE = "SELECT 1 AS int_data, 'hello' AS str_data"
    TEST_BUCKET_NAME = GCP_PROJECT_ID + "-gcp-toolkit-it"

    @classmethod
    def setUpClass(cls):
        cls.GCP_BIGQUERY_CLIENT = bigquery.Client(GCP_PROJECT_ID,
                                                  location="EU")
        cls.DATASET = cls.__dataset()
        cls.__create_dataset_if_not_exists(cls.DATASET)

    @classmethod
    def __create_dataset_if_not_exists(cls, dataset: Dataset):
        try:
            cls.GCP_BIGQUERY_CLIENT.create_dataset(dataset)
        except Conflict:
            pass

    def setUp(self):
        table_name = "test_table_" + str(
            int(datetime.datetime.utcnow().timestamp() * 1000))
        self.test_table = self.__create_test_table(table_name,
                                                   self.DATASET.reference)
        self.client = EmsBigqueryClient(GCP_PROJECT_ID)
        self.storage_client = storage.Client()

    def tearDown(self):
        try:
            bucket = self.storage_client.get_bucket(self.TEST_BUCKET_NAME)
            bucket.delete(True)
        except NotFound:
            pass

    def __create_test_table(self, table_name, dataset_id):
        table_schema = [
            SchemaField("int_data", "INT64"),
            SchemaField("str_data", "STRING")
        ]
        table_reference = TableReference(dataset_id, table_name)
        test_table = Table(table_reference, table_schema)
        test_table.time_partitioning = TimePartitioning("DAY")
        self.__delete_if_exists(test_table)
        self.GCP_BIGQUERY_CLIENT.create_table(test_table)
        return test_table

    def __get_test_bucket(self, bucket_name):

        try:
            bucket = self.storage_client.get_bucket(bucket_name)
        except NotFound:
            bucket = self.storage_client.bucket(bucket_name)
            bucket.location = "europe-west1"
            bucket.storage_class = "REGIONAL"
            bucket.create()
        return bucket

    def __delete_if_exists(self, table):
        try:
            self.GCP_BIGQUERY_CLIENT.delete_table(table)
        except NotFound:
            pass

    def test_run_sync_query_dummyQuery(self):
        result = self.client.run_sync_query(self.DUMMY_QUERY)

        rows = list(result)
        assert len(rows) == 1
        assert {"data": 1} == rows[0]

    def test_run_sync_query_nonExistingDataset(self):
        with self.assertRaises(EmsApiError) as context:
            self.client.run_sync_query(
                "SELECT * FROM `non_existing_dataset.whatever`")

        error_message = context.exception.args[0].lower()
        assert "not found" in error_message
        assert GCP_PROJECT_ID in error_message
        assert "non_existing_dataset" in error_message

    def test_run_sync_query_onExistingData(self):
        query = self.INSERT_TEMPLATE.format(self.__get_table_path())
        self.client.run_sync_query(query)

        query_result = self.client.run_sync_query(
            self.SELECT_TEMPLATE.format(self.__get_table_path()))

        assert [{"int_data": 1, "str_data": "hello"}] == list(query_result)

    def test_run_sync_query_withDestinationSet(self):
        ems_query_job_config = EmsQueryJobConfig(
            destination_dataset=ItEmsBigqueryClient.DATASET.dataset_id,
            destination_table=self.test_table.table_id)
        query_with_destination_result = list(
            self.client.run_sync_query(
                self.DUMMY_SELECT_TO_TABLE,
                ems_query_job_config=ems_query_job_config))
        query_result = list(
            self.client.run_sync_query(
                self.SELECT_TEMPLATE.format(self.__get_table_path())))

        assert [{"int_data": 1, "str_data": "hello"}] == query_result
        assert query_with_destination_result == query_result

    def test_run_async_query_submitsJob(self):
        job_id = self.client.run_async_query(self.DUMMY_QUERY)

        job = self.GCP_BIGQUERY_CLIENT.get_job(job_id)

        assert job.state is not None

    def test_run_get_job_list_returnsQueryJob(self):
        unique_id = self.client.run_async_query(self.DUMMY_QUERY)
        jobs_iterator = self.client.get_job_list()
        found = unique_id in [job.job_id for job in jobs_iterator]
        assert found

    def test_run_get_job_list_returns2JobsIfMaxResultSetTo2(self):
        for i in range(1, 3):
            self.client.run_async_query(self.DUMMY_QUERY)
        jobs_iterator = self.client.get_job_list(max_result=2)
        assert 2 == len(list(jobs_iterator))

    def test_get_jobs_with_prefix(self):
        job_prefix = "testprefix" + uuid.uuid4().hex
        id1 = self.client.run_async_query(self.DUMMY_QUERY,
                                          job_id_prefix=job_prefix)
        id2 = self.client.run_async_query(self.BAD_QUERY,
                                          job_id_prefix=job_prefix)
        id3 = self.client.run_async_query(self.DUMMY_QUERY,
                                          job_id_prefix="unique_prefix")

        self.__wait_for_job_submitted(id1)
        self.__wait_for_job_submitted(id2)
        self.__wait_for_job_submitted(id3)

        min_creation_time = datetime.datetime.utcnow() - datetime.timedelta(
            minutes=1)
        jobs = self.client.get_jobs_with_prefix(job_prefix, min_creation_time)
        job_ids = [job.job_id for job in jobs]

        expected_ids = [id1, id2]
        self.assertSetEqual(set(expected_ids), set(job_ids))

    def test_relaunch_failed_query_jobs(self):
        job_prefix = "testprefix" + uuid.uuid4().hex
        id1 = self.client.run_async_query(self.DUMMY_QUERY,
                                          job_id_prefix=job_prefix)
        id2 = self.client.run_async_query(self.BAD_QUERY,
                                          job_id_prefix=job_prefix)
        id3 = self.client.run_async_query(self.BAD_QUERY,
                                          job_id_prefix="unique_prefix")

        self.__wait_for_job_submitted(id1)
        self.__wait_for_job_submitted(id2)
        self.__wait_for_job_submitted(id3)

        min_creation_time = datetime.datetime.utcnow() - datetime.timedelta(
            minutes=1)
        job_ids = self.client.relaunch_failed_jobs(job_prefix,
                                                   min_creation_time)

        self.assertEqual(len(job_ids), 1)
        self.assertRegex(job_ids[0], job_prefix + "-retry-1-.*")

    def test_relaunch_failed_extract_jobs(self):
        min_creation_time = datetime.datetime.utcnow()
        job_prefix = "testprefix" + uuid.uuid4().hex
        bucket = self.__get_test_bucket(self.TEST_BUCKET_NAME)
        blob_name = f'exported_{int(min_creation_time.timestamp())}.csv'
        good_bucket = f"gs://{self.TEST_BUCKET_NAME}/{blob_name}"
        wrong_bucket = f"gs://ems_not_a_god_bucket_it_test/wrong_blob.vsct"
        id1 = self.client.run_async_extract_job(job_prefix,
                                                self.__get_table_path(),
                                                [wrong_bucket],
                                                EmsExtractJobConfig())
        id2 = self.client.run_async_extract_job(job_prefix,
                                                self.__get_table_path(),
                                                [good_bucket],
                                                EmsExtractJobConfig())
        id3 = self.client.run_async_extract_job("unique_prefix",
                                                self.__get_table_path(),
                                                [wrong_bucket],
                                                EmsExtractJobConfig())

        self.__wait_for_job_submitted(id1)
        self.__wait_for_job_submitted(id2)
        self.__wait_for_job_submitted(id3)

        job_ids = self.client.relaunch_failed_jobs(job_prefix,
                                                   min_creation_time)

        bucket.delete_blob(blob_name)
        self.assertEqual(len(job_ids), 1)
        self.assertRegex(job_ids[0], job_prefix + "-retry-1-.*")

    def test_get_job_list_returnsLoadJob(self):
        config = EmsLoadJobConfig(
            {"fields": [{
                "name": "some_name",
                "type": "STRING"
            }]},
            "gs://some-non-existing-bucket-id/blob-id",
            destination_project_id=GCP_PROJECT_ID,
            destination_dataset="it_test_dataset",
            destination_table="some_table")
        min_creation_time = datetime.datetime.utcnow()
        unique_id = self.client.run_async_load_job("load_job_test", config)
        self.__wait_for_job_done(unique_id)
        jobs_iterator = self.client.get_jobs_with_prefix(
            "load_job_test", min_creation_time)
        found = unique_id in [job.job_id for job in jobs_iterator]

        self.assertTrue(found)

    def test_get_job_list_returnsExtractJob(self):
        min_creation_time = datetime.datetime.utcnow()
        destination_uris = ["gs://some-non-existing-bucket-id/destination1"]
        table_path = self.__get_table_path()
        unique_id = self.client.run_async_extract_job(
            "extract_job_test",
            table_path,
            destination_uris,
            job_config=EmsExtractJobConfig())
        self.__wait_for_job_done(unique_id)
        jobs_iterator = self.client.get_jobs_with_prefix(
            "extract_job_test", min_creation_time)

        job: EmsExtractJob = next(j for j in jobs_iterator
                                  if j.job_id == unique_id)

        self.assertEqual(job.table, table_path)
        self.assertEqual(job.destination_uris, destination_uris)
        self.assertIsInstance(job.state, EmsJobState)
        self.assertEqual(job.state.value, "DONE")
        self.assertTrue(job.is_failed)

    def test_run_async_extract_job_shouldSaveToBucket(self):
        query = self.INSERT_TEMPLATE.format(self.__get_table_path())
        self.client.run_sync_query(query)
        min_creation_time = datetime.datetime.utcnow()

        bucket_name = self.TEST_BUCKET_NAME
        bucket = self.__get_test_bucket(bucket_name)
        blob_name = f'exported_{int(min_creation_time.timestamp())}.csv'

        job = self.__run_async_extract_job(min_creation_time, bucket_name,
                                           blob_name, False)

        blob = bucket.blob(blob_name)
        self.assertFalse(job.is_failed)
        self.assertTrue(blob.exists())
        self.assertEqual(blob.download_as_string(), b'1,hello\n')

        bucket.delete_blob(blob_name)

    def test_run_async_extract_job_shouldSaveToBucketWithHeader(self):
        query = self.INSERT_TEMPLATE.format(self.__get_table_path())
        self.client.run_sync_query(query)
        min_creation_time = datetime.datetime.utcnow()

        bucket_name = self.TEST_BUCKET_NAME
        bucket = self.__get_test_bucket(bucket_name)
        blob_name = f'exported_{int(min_creation_time.timestamp())}.csv'

        job = self.__run_async_extract_job(min_creation_time, bucket_name,
                                           blob_name, True)

        blob = bucket.blob(blob_name)
        self.assertFalse(job.is_failed)
        self.assertTrue(blob.exists())
        self.assertEqual(blob.download_as_string(),
                         b'int_data,str_data\n1,hello\n')

        bucket.delete_blob(blob_name)

    def __run_async_extract_job(self, min_creation_time, bucket_name,
                                blob_name, print_header):
        table_path = self.__get_table_path()
        job_id_prefix = "extract_job_test"
        unique_id = self.client.run_async_extract_job(
            job_id_prefix, table_path, [f'gs://{bucket_name}/{blob_name}'],
            EmsExtractJobConfig(compression=Compression.NONE,
                                destination_format=DestinationFormat.CSV,
                                field_delimiter=",",
                                print_header=print_header))
        self.__wait_for_job_done(unique_id)
        jobs_iterator = self.client.get_jobs_with_prefix(
            job_id_prefix, min_creation_time)
        job: EmsExtractJob = next(j for j in jobs_iterator
                                  if j.job_id == unique_id)
        return job

    def test_get_job_list_returnsAllKindOfJobs(self):
        load_config = EmsLoadJobConfig(
            {"fields": [{
                "name": "some_name",
                "type": "STRING"
            }]},
            "gs://some-non-existing-bucket-id/blob-id",
            destination_project_id=GCP_PROJECT_ID,
            destination_dataset="it_test_dataset",
            destination_table="some_table")
        destination_uris = ["gs://some-non-existing-bucket-id/destination1"]

        min_creation_time = datetime.datetime.utcnow()
        id_for_query_job = self.client.run_async_query(self.DUMMY_QUERY,
                                                       job_id_prefix="it_job")
        id_for_load_job = self.client.run_async_load_job(
            job_id_prefix="it_job", config=load_config)
        id_for_extract_job = self.client.run_async_extract_job(
            job_id_prefix="it_job",
            table=self.__get_table_path(),
            destination_uris=destination_uris,
            job_config=EmsExtractJobConfig(
                compression=Compression.NONE,
                destination_format=DestinationFormat.CSV,
                field_delimiter=",",
                print_header=False))

        self.__wait_for_job_done(id_for_query_job)
        self.__wait_for_job_done(id_for_load_job)
        self.__wait_for_job_done(id_for_extract_job)
        jobs_iterator = self.client.get_jobs_with_prefix(
            "it_job", min_creation_time)
        job_types = [type(j) for j in jobs_iterator]

        self.assertEqual(3, len(job_types))
        self.assertIn(EmsQueryJob, job_types)
        self.assertIn(EmsLoadJob, job_types)
        self.assertIn(EmsExtractJob, job_types)

    def test_run_async_load_job_loadsFileFromBucketToNewBigqueryTable(self):
        bucket_name = "it_test_ems_gcp_toolkit"
        bucket = self.__get_test_bucket(bucket_name)
        blob_name = "sample_fruit_test.csv"
        blob = bucket.blob(blob_name)
        random_quantity = random.randint(10000, 99000)
        blob.upload_from_string(
            f"apple,{random_quantity},True,1970-01-01T12:00:00.000Z\n")
        source_uri = f"gs://{bucket_name}/{blob_name}"
        config = EmsLoadJobConfig(
            source_uri_template=source_uri,
            destination_project_id=GCP_PROJECT_ID,
            destination_dataset=self.DATASET.dataset_id,
            destination_table="load_job_test",
            schema={
                "fields": [{
                    "type": "STRING",
                    "name": "fruit"
                }, {
                    "type": "INT64",
                    "name": "quantity"
                }, {
                    "type": "BOOL",
                    "name": "is_delicious"
                }, {
                    "type": "TIMESTAMP",
                    "name": "best_before"
                }]
            },
            write_disposition=EmsWriteDisposition.WRITE_TRUNCATE)

        load_job_id = self.client.run_async_load_job("it_test", config)
        self.__wait_for_job_done(load_job_id)

        query = f"""
        SELECT * from `{config.destination_project_id}.{config.destination_dataset}.{config.destination_table}`
        """

        result = self.client.run_sync_query(query=query)
        expected = [{
            "fruit":
            "apple",
            "quantity":
            random_quantity,
            "is_delicious":
            True,
            "best_before":
            datetime.datetime(1970,
                              1,
                              1,
                              12,
                              0,
                              0,
                              tzinfo=datetime.timezone.utc)
        }]
        self.assertEquals(expected, list(result))

    def test_run_async_load_job_whenLoadingFileWithHeader_headerIsSkiped(self):
        bucket_name = "it_test_ems_gcp_toolkit"
        bucket = self.__get_test_bucket(bucket_name)
        blob_name = "sample_test_with_header.csv"
        blob = bucket.blob(blob_name)
        blob.upload_from_string(f"HEADER\nROW\n")
        source_uri = f"gs://{bucket_name}/{blob_name}"
        config = EmsLoadJobConfig(
            source_uri_template=source_uri,
            destination_project_id=GCP_PROJECT_ID,
            destination_dataset=self.DATASET.dataset_id,
            destination_table="load_job_test_skip_header",
            schema={"fields": [{
                "type": "STRING",
                "name": "COLUMN"
            }]},
            write_disposition=EmsWriteDisposition.WRITE_TRUNCATE,
            skip_leading_rows=1)

        load_job_id = self.client.run_async_load_job("it_test", config)
        self.__wait_for_job_done(load_job_id)

        query = f"""
        SELECT * from `{config.destination_project_id}.{config.destination_dataset}.{config.destination_table}`
        """

        result = self.client.run_sync_query(query=query)
        expected = [{"COLUMN": "ROW"}]
        self.assertEquals(expected, list(result))

    def test_dataset_exists_WhenDatasetNotExists(self):
        dataset_id = self.__generate_test_name("dataset")

        self.assertFalse(self.client.dataset_exists(dataset_id))

    def test_dataset_exists_WhenDatasetExists(self):
        dataset_id = self.__generate_test_name("dataset")
        self.client.create_dataset_if_not_exists(dataset_id)

        self.assertTrue(self.client.dataset_exists(dataset_id))

        self.client.delete_dataset_if_exists(dataset_id)

    def test_create_dataset_if_not_exists_CreatesDataset(self):
        dataset_id = self.__generate_test_name("dataset")

        self.client.create_dataset_if_not_exists(dataset_id)

        self.assertTrue(self.client.dataset_exists(dataset_id))

        self.client.delete_dataset_if_exists(dataset_id)

    def test_create_dataset_if_not_exists_DoesNotRaiseExceptionWhenAlreadyExists(
            self):
        dataset_id = self.__generate_test_name("dataset")

        self.client.create_dataset_if_not_exists(dataset_id)
        try:
            self.client.create_dataset_if_not_exists(dataset_id)
        except Conflict:
            self.fail(
                "create_dataset_if_not_exists raised AlreadyExists error")

        self.client.delete_dataset_if_exists(dataset_id)

    def test_delete_dataset_if_exists_WhenItIsEmpty(self):
        dataset_id = self.__generate_test_name("dataset")
        self.client.create_dataset_if_not_exists(dataset_id)

        self.client.delete_dataset_if_exists(dataset_id)

        self.assertFalse(self.client.dataset_exists(dataset_id))

    def test_delete_dataset_if_exists_WhenItIsNotEmpty(self):
        dataset_id = self.__generate_test_name("dataset")
        table_name = self.__generate_test_name("table")
        self.client.create_dataset_if_not_exists(dataset_id)
        self.__create_test_table(
            table_name,
            Dataset(DatasetReference(GCP_PROJECT_ID, dataset_id)).reference)

        self.client.delete_dataset_if_exists(dataset_id, delete_contents=True)

        self.assertFalse(self.client.dataset_exists(dataset_id))

    def test_delete_dataset_DoesNotRaiseExceptionWhenNotExists(self):
        dataset_id = self.__generate_test_name("dataset")

        try:
            self.client.delete_dataset_if_exists(dataset_id)
        except NotFound:
            self.fail("delete_dataset_if_exists raised NotFound error")

    @retry(stop=(stop_after_delay(10)))
    def __wait_for_job_submitted(self, job_id):
        self.GCP_BIGQUERY_CLIENT.get_job(job_id)

    @retry(
        stop=(stop_after_delay(10)),
        retry=(
            retry_if_result(lambda result: result != EmsJobState.DONE.value)))
    def __wait_for_job_done(self, job_id):
        return self.GCP_BIGQUERY_CLIENT.get_job(job_id).state

    def __get_table_path(self):
        return "{}.{}.{}".format(GCP_PROJECT_ID, self.DATASET.dataset_id,
                                 self.test_table.table_id)

    @classmethod
    def __dataset(cls):
        dataset = Dataset(DatasetReference(GCP_PROJECT_ID, "it_test_dataset"))
        dataset.default_table_expiration_ms = cls.ONE_DAY_IN_MS
        return dataset

    @staticmethod
    def __generate_test_name(context: str):
        return "test_" + context + "_" + str(int(time.time()))