def test_download_table_as_df(self, mocker): self._stop_time(mocker) mocked_gcs_to_df = mocker.patch("feast.sdk.utils.bq_util.gcs_to_df", return_value=None) staging_path = "gs://temp/" staging_file_name = "temp_0" table_id = "project_id.dataset_id.table_id" table_dldr = TableDownloader() exp_staging_path = os.path.join(staging_path, staging_file_name) table_dldr._bq = _Mock_BQ_Client() mocker.patch.object(table_dldr._bq, "extract_table", return_value=_Job()) table_dldr.download_table_as_df(table_id, staging_location=staging_path) assert len(table_dldr._bq.extract_table.call_args_list) == 1 args, kwargs = table_dldr._bq.extract_table.call_args_list[0] assert args[0].full_table_id == Table.from_string( table_id).full_table_id assert args[1] == exp_staging_path assert kwargs["job_config"].destination_format == "CSV" mocked_gcs_to_df.assert_called_once_with(exp_staging_path)
def _test_download_file(self, mocker, type): staging_path = "gs://temp/" staging_file_name = "temp_0" dst_path = "/tmp/myfile.csv" table_id = "project_id.dataset_id.table_id" table_dldr = TableDownloader() mock_blob = _Blob() mocker.patch.object(mock_blob, "download_to_filename") table_dldr._bq = _Mock_BQ_Client() mocker.patch.object(table_dldr._bq, "extract_table", return_value=_Job()) table_dldr._gcs = _Mock_GCS_Client() mocker.patch.object(table_dldr._gcs, "get_bucket", return_value=_Bucket(mock_blob)) table_dldr.download_table_as_file(table_id, dst_path, staging_location=staging_path, file_type=type) exp_staging_path = os.path.join(staging_path, staging_file_name) assert len(table_dldr._bq.extract_table.call_args_list) == 1 args, kwargs = table_dldr._bq.extract_table.call_args_list[0] assert args[0].full_table_id == Table.from_string( table_id).full_table_id assert args[1] == exp_staging_path assert kwargs["job_config"].destination_format == str(type) mock_blob.download_to_filename.assert_called_once_with(dst_path)
def __init__(self, core_url=None, serving_url=None, verbose=False): """Create an instance of Feast client which is connected to feast endpoint specified in the parameter. If no url is provided, the client will default to the url specified in the environment variable FEAST_CORE_URL. Args: core_url (str, optional): feast's grpc endpoint URL (e.g.: "my.feast.com:8433") serving_url (str, optional): feast serving's grpc endpoint URL (e.g.: "my.feast.com:8433") """ if core_url is None: core_url = os.getenv(FEAST_CORE_URL_ENV_KEY) self._core_url = core_url if serving_url is None: serving_url = os.getenv(FEAST_SERVING_URL_ENV_KEY) self._serving_url = serving_url self.__core_channel = None self.__serving_channel = None self._core_service_stub = None self._job_service_stub = None self._dataset_service_stub = None self._serving_service_stub = None self._verbose = verbose self._table_downloader = TableDownloader()
def _test_download_file(self, mocker, type): mocked_gcs_folder_to_file = mocker.patch( "feast.sdk.utils.bq_util.gcs_folder_to_file", return_value=None) staging_path = "gs://temp" temp_folder = "temp_0" full_table_id = "project_id.dataset_id.table_id" dst_path = "/tmp/myfile.csv" exp_staging_folder = os.path.join(staging_path, temp_folder) exp_staging_path = os.path.join(exp_staging_folder, "shard_*") table_dldr = TableDownloader() table_dldr._bqclient = _Mock_BQ_Client() mocker.patch.object(table_dldr._bqclient, "extract_table", return_value=_Job()) table_dldr.download_table_as_file(full_table_id, dst_path, staging_location=staging_path, file_type=type) assert len(table_dldr._bqclient.extract_table.call_args_list) == 1 args, kwargs = table_dldr._bqclient.extract_table.call_args_list[0] assert args[0].full_table_id == Table.from_string( full_table_id).full_table_id assert args[1] == exp_staging_path assert kwargs["job_config"].destination_format == str(type) mocked_gcs_folder_to_file.assert_called_once_with( exp_staging_folder, dst_path)
def test_download_invalid_staging_url(self): table_id = "project_id.dataset_id.table_id" table_dldr = TableDownloader() with pytest.raises(ValueError, match="staging_uri must be a directory in " "GCS"): table_dldr.download_table_as_file(table_id, "/tmp/dst", "/local/directory", FileType.CSV) with pytest.raises(ValueError, match="staging_uri must be a directory in " "GCS"): table_dldr.download_table_as_df(table_id, "/local/directory")
def test_download_dataset_as_file(self, client, mocker): destination = "/tmp/dest_file" table_dlder = TableDownloader() mocker.patch.object( table_dlder, "download_table_as_file", return_value=destination) client._table_downloader = table_dlder full_table_id = "project.dataset.table" staging_location = "gs://gcs_bucket/" dataset = DatasetInfo("mydataset", full_table_id) result = client.download_dataset( dataset, destination, staging_location=staging_location, file_type=FileType.CSV) assert result == destination table_dlder.download_table_as_file.assert_called_once_with( full_table_id, destination, staging_location, FileType.CSV)