Exemple #1
0
    def test_create_dataset_with_name_prefix(self, client, mocker):
        entity_name = "myentity"
        feature_ids = ["myentity.feature1", "myentity.feature2"]
        fs = FeatureSet(entity_name, feature_ids)
        start_date = "2018-01-02"
        end_date = "2018-12-31"
        limit = 100
        name_prefix = "feast"

        ds_pb = DatasetInfo_pb(name="dataset_name",
                               tableUrl="project.dataset.table")

        mock_dssvc_stub = training.DatasetServiceStub(
            grpc.insecure_channel(""))
        mocker.patch.object(
            mock_dssvc_stub,
            "CreateDataset",
            return_value=DatasetServiceTypes.CreateDatasetResponse(
                datasetInfo=ds_pb))
        client._dataset_service_stub = mock_dssvc_stub

        ds = client.create_dataset(fs,
                                   start_date,
                                   end_date,
                                   limit=limit,
                                   name_prefix=name_prefix)

        assert "dataset_name" == ds.name
        assert "project.dataset.table" == ds.full_table_id
        mock_dssvc_stub.CreateDataset.assert_called_once_with(
            DatasetServiceTypes.CreateDatasetRequest(
                featureSet=fs.proto,
                startDate=_timestamp_from_datetime(_parse_date(start_date)),
                endDate=_timestamp_from_datetime(_parse_date(end_date)),
                limit=limit,
                namePrefix=name_prefix))
Exemple #2
0
    def create_dataset(self,
                       feature_set,
                       start_date,
                       end_date,
                       limit=None,
                       name_prefix=None):
        """
        Create training dataset for a feature set. The training dataset
        will be bounded by event timestamp between start_date and end_date.
        Specify limit to limit number of row returned. The training dataset
        will reside in a bigquery table specified by destination.

        Args:
            feature_set (feast.sdk.resources.feature_set.FeatureSet):
                feature set representing the data wanted
            start_date (str): starting date of the training data in ISO 8601
                format (e.g.: "2018-12-31")
            end_date (str): end date of training data in ISO 8601 format (e.g.:
                "2018-12-31")
            limit (int, optional): (default: None) maximum number of row
                returned
            name_prefix (str, optional): (default: None) name prefix.
        :return:
            feast.resources.feature_set.DatasetInfo: DatasetInfo containing
            the information of training dataset
        """
        self._check_create_dataset_args(feature_set, start_date, end_date,
                                        limit)

        req = DatasetServiceTypes.CreateDatasetRequest(
            featureSet=feature_set.proto,
            startDate=_timestamp_from_datetime(_parse_date(start_date)),
            endDate=_timestamp_from_datetime(_parse_date(end_date)),
            limit=limit,
            namePrefix=name_prefix,
        )
        if self.verbose:
            print("creating training dataset for features: " +
                  str(feature_set.features))
        self._connect_core()
        resp = self._dataset_service_stub.CreateDataset(req)

        if self.verbose:
            print("created dataset {}: {}".format(resp.datasetInfo.name,
                                                  resp.datasetInfo.tableUrl))
        return DatasetInfo(resp.datasetInfo.name, resp.datasetInfo.tableUrl)