def __init__( self, prediction_net: BlockType, batch_size: int, prediction_length: int, freq: str, ctx: mx.Context, input_transform: Transformation, lead_time: int = 0, forecast_generator: ForecastGenerator = SampleForecastGenerator(), output_transform: Optional[ Callable[[DataEntry, np.ndarray], np.ndarray] ] = None, dtype: Type = np.float32, ) -> None: super().__init__( input_names=get_hybrid_forward_input_names(type(prediction_net)), prediction_net=prediction_net, batch_size=batch_size, prediction_length=prediction_length, freq=freq, ctx=ctx, input_transform=input_transform, lead_time=lead_time, forecast_generator=forecast_generator, output_transform=output_transform, dtype=dtype, )
def create_validation_data_loader( self, data: Dataset, **kwargs, ) -> DataLoader: input_names = get_hybrid_forward_input_names(DeepFactorTrainingNetwork) instance_splitter = self._create_instance_splitter("validation") return ValidationDataLoader( dataset=data, transform=instance_splitter + SelectFields(input_names), batch_size=self.batch_size, stack_fn=partial(batchify, ctx=self.trainer.ctx, dtype=self.dtype), )
def create_validation_data_loader( self, data: Dataset, **kwargs, ) -> DataLoader: input_names = get_hybrid_forward_input_names(CanonicalTrainingNetwork) with env._let(max_idle_transforms=maybe_len(data) or 0): instance_splitter = self._create_instance_splitter("validation") return ValidationDataLoader( dataset=data, transform=instance_splitter + SelectFields(input_names), batch_size=self.batch_size, stack_fn=partial(batchify, ctx=self.trainer.ctx, dtype=self.dtype), )
def create_training_data_loader( self, data: Dataset, **kwargs, ) -> DataLoader: input_names = get_hybrid_forward_input_names(LSTNetTrain) instance_splitter = self._create_instance_splitter("training") return TrainDataLoader( dataset=data, transform=instance_splitter + SelectFields(input_names), batch_size=self.batch_size, stack_fn=partial(batchify, ctx=self.trainer.ctx, dtype=self.dtype), decode_fn=partial(as_in_context, ctx=self.trainer.ctx), **kwargs, )
def create_training_data_loader( self, data: Dataset, **kwargs, ) -> DataLoader: input_names = get_hybrid_forward_input_names(DeepStateTrainingNetwork) with env._let(max_idle_transforms=maybe_len(data) or 0): instance_splitter = self._create_instance_splitter("training") return TrainDataLoader( dataset=data, transform=instance_splitter + SelectFields(input_names), batch_size=self.batch_size, stack_fn=partial(batchify, ctx=self.trainer.ctx, dtype=self.dtype), decode_fn=partial(as_in_context, ctx=self.trainer.ctx), **kwargs, )
def test_distribution(): """ Makes sure additional tensors can be accessed and have expected shapes """ prediction_length = ds_info.prediction_length estimator = DeepAREstimator( freq=freq, prediction_length=prediction_length, trainer=Trainer(epochs=2, num_batches_per_epoch=1), distr_output=StudentTOutput(), ) train_output = estimator.train_model(train_ds, test_ds) # todo adapt loader to anomaly detection use-case batch_size = 2 num_samples = 3 training_data_loader = TrainDataLoader( dataset=train_ds, transform=train_output.transformation, batch_size=batch_size, num_batches_per_epoch=estimator.trainer.num_batches_per_epoch, stack_fn=partial(batchify, ctx=mx.cpu()), ) seq_len = 2 * ds_info.prediction_length for data_entry in islice(training_data_loader, 1): input_names = get_hybrid_forward_input_names(train_output.trained_net) distr = train_output.trained_net.distribution( *[data_entry[k] for k in input_names]) assert distr.sample(num_samples).shape == ( num_samples, batch_size, seq_len, )
dataset=dataset.train, transform=train_output.transformation, batch_size=batch_size, num_batches_per_epoch=estimator.trainer.num_batches_per_epoch, ctx=mx.cpu(), ) for data_entry in islice(training_data_loader, 1): pass # we now call the train model to get the predicted distribution on each window # this allows us to investigate where are the biggest anomalies context_length = train_output.trained_net.context_length prediction_length = train_output.trained_net.prediction_length input_names = get_hybrid_forward_input_names(train_output.trained_net) distr = train_output.trained_net.distribution( *[data_entry[k] for k in input_names]) # gets all information into numpy array for further plotting samples = distr.sample(num_samples).asnumpy() percentiles = np.percentile(samples, axis=0, q=[10.0, 90.0]) target = mx.ndarray.concat(data_entry["past_target"], data_entry["future_target"], dim=1) target = target[:, -(context_length + prediction_length):] nll = -distr.log_prob(target).asnumpy() target = target.asnumpy() mean = samples.mean(axis=0) percentiles = np.percentile(samples, axis=0, q=[10.0, 90.0])
def train_model( self, training_data: Dataset, validation_data: Optional[Dataset] = None, num_workers: Optional[int] = None, num_prefetch: Optional[int] = None, shuffle_buffer_length: Optional[int] = None, **kwargs, ) -> TrainOutput: transformation = self.create_transformation() # ensure that the training network is created within the same MXNet # context as the one that will be used during training with self.trainer.ctx: trained_net = self.create_training_network() input_names = get_hybrid_forward_input_names(trained_net) training_data_loader = TrainDataLoader( dataset=training_data, transform=transformation + SelectFields(input_names), batch_size=self.batch_size, stack_fn=partial( batchify, ctx=self.trainer.ctx, dtype=self.dtype, ), num_workers=num_workers, num_prefetch=num_prefetch, shuffle_buffer_length=shuffle_buffer_length, decode_fn=partial(as_in_context, ctx=self.trainer.ctx), **kwargs, ) validation_data_loader = None if validation_data is not None: validation_data_loader = ValidationDataLoader( dataset=validation_data, transform=transformation + SelectFields(input_names), batch_size=self.batch_size, stack_fn=partial( batchify, ctx=self.trainer.ctx, dtype=self.dtype, ), num_workers=num_workers, num_prefetch=num_prefetch, **kwargs, ) self.trainer( net=trained_net, train_iter=training_data_loader, validation_iter=validation_data_loader, ) with self.trainer.ctx: # ensure that the prediction network is created within the same MXNet # context as the one that was used during training return TrainOutput( transformation=transformation, trained_net=trained_net, predictor=self.create_predictor(transformation, trained_net), )
def train( self, training_data: Dataset, validation_data: Optional[Dataset] = None, num_workers: Optional[int] = None, num_prefetch: Optional[int] = None, shuffle_buffer_length: Optional[int] = None, **kwargs, ) -> Predictor: has_negative_data = any(np.any(d["target"] < 0) for d in training_data) low = -10.0 if has_negative_data else 0 high = 10.0 bin_centers = np.linspace(low, high, self.num_bins) bin_edges = np.concatenate( [[-1e20], (bin_centers[1:] + bin_centers[:-1]) / 2.0, [1e20]] ) logging.info( f"using training windows of length = {self.train_window_length}" ) transformation = self.create_transformation( bin_edges, pred_length=self.train_window_length ) # ensure that the training network is created within the same MXNet # context as the one that will be used during training with self.trainer.ctx: params = self._get_wavenet_args(bin_centers) params.update(pred_length=self.train_window_length) trained_net = WaveNet(**params) input_names = get_hybrid_forward_input_names(trained_net) training_data_loader = TrainDataLoader( dataset=training_data, transform=transformation + SelectFields(input_names), batch_size=self.batch_size, stack_fn=partial(batchify, ctx=self.trainer.ctx, dtype=self.dtype), num_workers=num_workers, num_prefetch=num_prefetch, shuffle_buffer_length=shuffle_buffer_length, **kwargs, ) validation_data_loader = None if validation_data is not None: validation_data_loader = ValidationDataLoader( dataset=validation_data, transform=transformation, batch_size=self.batch_size, stack_fn=partial( batchify, ctx=self.trainer.ctx, dtype=self.dtype ), num_workers=num_workers, num_prefetch=num_prefetch, **kwargs, ) self.trainer( net=trained_net, train_iter=training_data_loader, validation_iter=validation_data_loader, ) # ensure that the prediction network is created within the same MXNet # context as the one that was used during training with self.trainer.ctx: return self.create_predictor( transformation, trained_net, bin_centers )