def test_from_csv_staging_location_not_specified(self): with pytest.raises(ValueError, match="Specify staging_location for importing local file/dataframe"): feature_columns = ["avg_distance_completed", "avg_customer_distance_completed"] csv_path = "tests/data/driver_features.csv" Importer.from_csv(path=csv_path, entity="driver", granularity=Granularity.DAY, owner="*****@*****.**", feature_columns=feature_columns, timestamp_column="ts") with pytest.raises(ValueError, match="Staging location must be in GCS") as e_info: feature_columns = ["avg_distance_completed", "avg_customer_distance_completed"] csv_path = "tests/data/driver_features.csv" Importer.from_csv(path=csv_path, entity="driver", granularity=Granularity.DAY, owner="*****@*****.**", staging_location="/home", feature_columns=feature_columns, timestamp_column="ts")
def test_from_csv_id_column_not_specified(self): with pytest.raises(ValueError, match="Column with name driver is not found"): feature_columns = ["avg_distance_completed", "avg_customer_distance_completed"] csv_path = "tests/data/driver_features.csv" Importer.from_csv(path=csv_path, entity="driver", granularity=Granularity.DAY, owner="*****@*****.**", staging_location="gs://test-bucket", feature_columns=feature_columns, timestamp_column="ts")
def test_from_csv_staging_location_not_valid(self): with pytest.raises(ValueError, match="Staging location must be in GCS") as e_info: feature_columns = [ "avg_distance_completed", "avg_customer_distance_completed" ] csv_path = "tests/data/driver_features.csv" Importer.from_csv(path=csv_path, entity="driver", owner="*****@*****.**", staging_location="/home", feature_columns=feature_columns, timestamp_column="ts")
def test_from_csv_timestamp_column_not_specified(self): feature_columns = [ "avg_distance_completed", "avg_customer_distance_completed", "avg_distance_cancelled" ] csv_path = "tests/data/driver_features.csv" entity_name = "driver" granularity = Granularity.DAY owner = "*****@*****.**" staging_location = "gs://test-bucket" id_column = "driver_id" importer = Importer.from_csv(path=csv_path, entity=entity_name, granularity=granularity, owner=owner, staging_location=staging_location, id_column=id_column, feature_columns=feature_columns) self._validate_csv_importer(importer, csv_path, entity_name, granularity, owner, staging_location=staging_location, id_column=id_column, feature_columns=feature_columns)
def test_stage_df_without_timestamp(self, mocker): mocker.patch("feast.sdk.importer.df_to_gcs", return_value=True) feature_columns = [ "avg_distance_completed", "avg_customer_distance_completed", "avg_distance_cancelled" ] csv_path = "tests/data/driver_features.csv" entity_name = "driver" owner = "*****@*****.**" staging_location = "gs://test-bucket" id_column = "driver_id" importer = Importer.from_csv(path=csv_path, entity=entity_name, owner=owner, staging_location=staging_location, id_column=id_column, feature_columns=feature_columns) importer.stage(None)
def test_from_csv_feature_columns_not_specified(self): csv_path = "tests/data/driver_features.csv" entity_name = "driver" granularity = Granularity.DAY owner = "*****@*****.**" staging_location = "gs://test-bucket" id_column = "driver_id" timestamp_column = "ts" importer = Importer.from_csv(path=csv_path, entity=entity_name, granularity=granularity, owner=owner, staging_location=staging_location, id_column=id_column, timestamp_column=timestamp_column) self._validate_csv_importer(importer, csv_path, entity_name, granularity, owner, staging_location=staging_location, id_column=id_column, timestamp_column=timestamp_column)
def test_from_csv(self): csv_path = "tests/data/driver_features.csv" entity_name = "driver" owner = "*****@*****.**" staging_location = "gs://test-bucket" id_column = "driver_id" feature_columns = [ "avg_distance_completed", "avg_customer_distance_completed" ] timestamp_column = "ts" importer = Importer.from_csv(path=csv_path, entity=entity_name, owner=owner, staging_location=staging_location, id_column=id_column, feature_columns=feature_columns, timestamp_column=timestamp_column) self._validate_csv_importer(importer, csv_path, entity_name, owner, staging_location, id_column, feature_columns, timestamp_column)