def compute_data_from_recipe( self, num_steps: int, constant: Optional[float] = None, one_to_zero: float = 0.1, zero_to_one: float = 0.1, scale_features: float = 200, ) -> TrainDatasets: recipe = [] recipe_type = Constant(constant) if self.is_noise: recipe_type += RandomGaussian() # Use default stddev = 1.0 if self.is_trend: recipe_type += LinearTrend() if self.is_promotions: recipe.append( ("binary_causal", BinaryMarkovChain(one_to_zero, zero_to_one)) ) recipe.append( (FieldName.FEAT_DYNAMIC_REAL, Stack(["binary_causal"])) ) recipe_type += scale_features * Lag("binary_causal", lag=0) if self.holidays: # Compute dates array dates = list( pd.period_range(self.start, periods=num_steps, freq=self.freq) ) recipe.append( ("binary_holidays", BinaryHolidays(dates, self.holidays)) ) recipe.append( (FieldName.FEAT_DYNAMIC_REAL, Stack(["binary_holidays"])) ) recipe_type += scale_features * Lag("binary_holidays", lag=0) recipe.append((FieldName.TARGET, recipe_type)) max_train_length = num_steps - self.prediction_length data = RecipeDataset( recipe=recipe, metadata=self.metadata, max_train_length=max_train_length, prediction_length=self.prediction_length, # Add 1 time series at a time in the loop for different constant # valus per time series num_timeseries=1, ) generated = data.generate() return generated
def default_synthetic() -> Tuple[DatasetInfo, Dataset, Dataset]: recipe = [ (FieldName.TARGET, LinearTrend() + RandomGaussian()), (FieldName.FEAT_STATIC_CAT, RandomCat([10])), ( FieldName.FEAT_STATIC_REAL, ForEachCat(RandomGaussian(1, (10,)), FieldName.FEAT_STATIC_CAT) + RandomGaussian(0.1, (10,)), ), ] data = RecipeDataset( recipe=recipe, metadata=MetaData( freq="D", feat_static_real=[ BasicFeatureInfo(name=FieldName.FEAT_STATIC_REAL) ], feat_static_cat=[ CategoricalFeatureInfo( name=FieldName.FEAT_STATIC_CAT, cardinality=10 ) ], feat_dynamic_real=[ BasicFeatureInfo(name=FieldName.FEAT_DYNAMIC_REAL) ], ), max_train_length=20, prediction_length=10, num_timeseries=10, trim_length_fun=lambda x, **kwargs: np.minimum( int(np.random.geometric(1 / (kwargs["train_length"] / 2))), kwargs["train_length"], ), ) generated = data.generate() assert generated.test is not None info = data.dataset_info(generated.train, generated.test) return info, generated.train, generated.test
def compute_data_from_recipe( self, num_steps: int, constant: Optional[float] = None, one_to_zero: float = 0.1, zero_to_one: float = 0.1, scale_features: float = 200, ) -> TrainDatasets: recipe = [] recipe_type = Constant(constant) if self.is_noise: recipe_type += RandomGaussian() # Use default stddev = 1.0 if self.is_trend: recipe_type += LinearTrend() if self.is_promotions: recipe.append( ('binary_causal', BinaryMarkovChain(one_to_zero, zero_to_one))) recipe.append(('feat_dynamic_real', Stack(['binary_causal']))) recipe_type += scale_features * Lag('binary_causal', lag=0) if self.holidays: timestamp = self.init_date() # Compute dates array dates = [] for i in range(num_steps): dates.append(timestamp) timestamp += 1 recipe.append(('binary_holidays', Binary(dates, self.holidays))) recipe.append(('feat_dynamic_real', Stack(['binary_holidays']))) recipe_type += scale_features * Lag('binary_holidays', lag=0) recipe.append(('target', recipe_type)) max_train_length = num_steps - self.prediction_length data = RecipeDataset( recipe=recipe, metadata=self.metadata, max_train_length=max_train_length, prediction_length=self.prediction_length, num_timeseries= 1, # Add 1 time series at a time in the loop for different constant valus per time series ) generated = data.generate() return generated
def default_synthetic() -> Tuple[DatasetInfo, Dataset, Dataset]: recipe = [ ('target', LinearTrend() + RandomGaussian()), ('feat_static_cat', RandomCat([10])), ( 'feat_static_real', ForEachCat(RandomGaussian(1, 10), 'feat_static_cat') + RandomGaussian(0.1, 10), ), ] data = RecipeDataset( recipe=recipe, metadata=MetaData( time_granularity='D', feat_static_real=[BasicFeatureInfo(name='feat_static_real')], feat_static_cat=[ CategoricalFeatureInfo(name='feat_static_cat', cardinality=10) ], feat_dynamic_real=[BasicFeatureInfo(name='feat_dynamic_real')], ), max_train_length=20, prediction_length=10, num_timeseries=10, trim_length_fun=lambda x, **kwargs: np.minimum( int(np.random.geometric(1 / (kwargs['train_length'] / 2))), kwargs['train_length'], ), ) generated = data.generate() assert generated.test is not None info = data.dataset_info(generated.train, generated.test) return info, generated.train, generated.test
Env, ) BASE_RECIPE = [("foo", ConstantVec(1.0)), ("cat", RandomCat([10]))] @pytest.mark.parametrize( "func", [ Debug(), RandomGaussian(), RandomBinary(), RandomSymmetricDirichlet(), BinaryMarkovChain(0.1, 0.1), Constant(1), LinearTrend(), RandomCat([10]), Lag("foo", 1), ForEachCat(RandomGaussian()), Eval("np.random.rand(length)"), SmoothSeasonality(Constant(12), Constant(0)), Add(["foo", "foo"]), Mul(["foo", "foo"]), NanWhere("foo", "foo"), Stack([Ref("foo"), Ref("foo")]), RandomGaussian() + RandomGaussian(), RandomGaussian() * RandomGaussian(), RandomGaussian() / RandomGaussian(), ], ) def test_call_and_repr(func) -> None:
take_as_list, ) BASE_RECIPE = [('foo', ConstantVec(1.0)), ('cat', RandomCat([10]))] @pytest.mark.parametrize( "func", [ Debug(), RandomGaussian(), RandomBinary(), RandomSymmetricDirichlet(), BinaryMarkovChain(0.1, 0.1), Constant(1), LinearTrend(), RandomCat([10]), Lag("foo", 1), ForEachCat(RandomGaussian()), Expr("np.random.rand(length)"), SmoothSeasonality(Constant(12), Constant(0)), Add(['foo', 'foo']), Mul(['foo', 'foo']), NanWhere('foo', 'foo'), NanWhereNot('foo', 'foo'), Stack(['foo', 'foo']), RandomGaussian() + RandomGaussian(), RandomGaussian() * RandomGaussian(), RandomGaussian() / RandomGaussian(), ], )