def train_model( self, training_data: Dataset, validation_data: Optional[Dataset] = None, num_workers: Optional[int] = None, num_prefetch: Optional[int] = None, shuffle_buffer_length: Optional[int] = None, **kwargs, ) -> TrainOutput: transformation = self.create_transformation() training_data_loader = TrainDataLoader( dataset=training_data, transform=transformation, batch_size=self.trainer.batch_size, num_batches_per_epoch=self.trainer.num_batches_per_epoch, stack_fn=partial( batchify, ctx=self.trainer.ctx, dtype=self.dtype, ), num_workers=num_workers, num_prefetch=num_prefetch, shuffle_buffer_length=shuffle_buffer_length, decode_fn=partial(as_in_context, ctx=self.trainer.ctx), **kwargs, ) validation_data_loader = None if validation_data is not None: validation_data_loader = ValidationDataLoader( dataset=validation_data, transform=transformation, batch_size=self.trainer.batch_size, stack_fn=partial( batchify, ctx=self.trainer.ctx, dtype=self.dtype, ), num_workers=num_workers, num_prefetch=num_prefetch, **kwargs, ) # ensure that the training network is created within the same MXNet # context as the one that will be used during training with self.trainer.ctx: trained_net = self.create_training_network() self.trainer( net=trained_net, input_names=get_hybrid_forward_input_names(trained_net), train_iter=training_data_loader, validation_iter=validation_data_loader, ) with self.trainer.ctx: # ensure that the prediction network is created within the same MXNet # context as the one that was used during training return TrainOutput( transformation=transformation, trained_net=trained_net, predictor=self.create_predictor(transformation, trained_net), )
def __init__( self, prediction_net: BlockType, batch_size: int, prediction_length: int, freq: str, ctx: mx.Context, input_transform: Transformation, lead_time: int = 0, forecast_generator: ForecastGenerator = SampleForecastGenerator(), output_transform: Optional[Callable[[DataEntry, np.ndarray], np.ndarray]] = None, dtype: DType = np.float32, ) -> None: super().__init__( input_names=get_hybrid_forward_input_names(prediction_net), prediction_net=prediction_net, batch_size=batch_size, prediction_length=prediction_length, freq=freq, ctx=ctx, input_transform=input_transform, lead_time=lead_time, forecast_generator=forecast_generator, output_transform=output_transform, dtype=dtype, )
def train_model( self, training_data: Dataset) -> Tuple[Transformation, HybridBlock]: transformation = self.create_transformation() transformation.estimate(iter(training_data)) training_data_loader = TrainDataLoader( dataset=training_data, transform=transformation, batch_size=self.trainer.batch_size, num_batches_per_epoch=self.trainer.num_batches_per_epoch, ctx=self.trainer.ctx, float_type=self.float_type, ) # ensure that the training network is created within the same MXNet # context as the one that will be used during training with self.trainer.ctx: trained_net = self.create_training_network() self.trainer( net=trained_net, input_names=get_hybrid_forward_input_names(trained_net), train_iter=training_data_loader, ) return transformation, trained_net
def __init__( self, prediction_net: BlockType, batch_size: int, prediction_length: int, freq: str, ctx: mx.Context, input_transform: Transformation, output_transform: Optional[Callable[[DataEntry, np.ndarray], np.ndarray]] = None, float_type: DType = np.float32, forecast_cls_name: str = "SampleForecast", forecast_kwargs: Optional[Dict] = None, ) -> None: super().__init__( input_names=get_hybrid_forward_input_names(prediction_net), prediction_net=prediction_net, batch_size=batch_size, prediction_length=prediction_length, freq=freq, ctx=ctx, input_transform=input_transform, output_transform=output_transform, float_type=float_type, forecast_cls_name=forecast_cls_name, forecast_kwargs=forecast_kwargs, )
def train_model(self, training_data: Dataset) -> TrainOutput: transformation = self.create_transformation() transformation.estimate(iter(training_data)) training_data_loader = TrainDataLoader( dataset=training_data, transform=transformation, batch_size=self.trainer.batch_size, num_batches_per_epoch=self.trainer.num_batches_per_epoch, ctx=self.trainer.ctx, dtype=self.dtype, ) # ensure that the training network is created within the same MXNet # context as the one that will be used during training with self.trainer.ctx: trained_net = self.create_training_network() self.trainer( net=trained_net, input_names=get_hybrid_forward_input_names(trained_net), train_iter=training_data_loader, ) with self.trainer.ctx: # ensure that the prediction network is created within the same MXNet # context as the one that was used during training return TrainOutput( transformation=transformation, trained_net=trained_net, predictor=self.create_predictor(transformation, trained_net), )
def train( self, training_data: Dataset, validation_data: Optional[Dataset] = None ) -> Predictor: has_negative_data = any(np.any(d["target"] < 0) for d in training_data) low = -10.0 if has_negative_data else 0 high = 10.0 bin_centers = np.linspace(low, high, self.num_bins) bin_edges = np.concatenate( [[-1e20], (bin_centers[1:] + bin_centers[:-1]) / 2.0, [1e20]] ) logging.info( f"using training windows of length = {self.train_window_length}" ) transformation = self.create_transformation( bin_edges, pred_length=self.train_window_length ) transformation.estimate(iter(training_data)) training_data_loader = TrainDataLoader( dataset=training_data, transform=transformation, batch_size=self.trainer.batch_size, num_batches_per_epoch=self.trainer.num_batches_per_epoch, ctx=self.trainer.ctx, ) validation_data_loader = None if validation_data is not None: validation_data_loader = ValidationDataLoader( dataset=validation_data, transform=transformation, batch_size=self.trainer.batch_size, ctx=self.trainer.ctx, dtype=self.dtype, ) # ensure that the training network is created within the same MXNet # context as the one that will be used during training with self.trainer.ctx: params = self._get_wavenet_args(bin_centers) params.update(pred_length=self.train_window_length) trained_net = WaveNet(**params) self.trainer( net=trained_net, input_names=get_hybrid_forward_input_names(trained_net), train_iter=training_data_loader, validation_iter=validation_data_loader, ) # ensure that the prediction network is created within the same MXNet # context as the one that was used during training with self.trainer.ctx: return self.create_predictor( transformation, trained_net, bin_centers )
def train(self, training_data: Dataset) -> Predictor: has_negative_data = any(np.any(d["target"] < 0) for d in training_data) mean_length = int(np.mean([len(d["target"]) for d in training_data])) low = -10.0 if has_negative_data else 0 high = 10.0 bin_centers = np.linspace(low, high, self.num_bins) bin_edges = np.concatenate([[-1e20], (bin_centers[1:] + bin_centers[:-1]) / 2.0, [1e20]]) # Here we override the prediction length for training. # This computes the loss over longer windows and makes the convolutions more # efficient, since calculations are reused. pred_length = min(mean_length, self.train_window_length) logging.info(f"mean series length = {mean_length}") logging.info(f"using training windows of length = {pred_length}") transformation = self.create_transformation(bin_edges, pred_length=pred_length) transformation.estimate(iter(training_data)) training_data_loader = TrainDataLoader( dataset=training_data, transform=transformation, batch_size=self.trainer.batch_size, num_batches_per_epoch=self.trainer.num_batches_per_epoch, ctx=self.trainer.ctx, ) # ensure that the training network is created within the same MXNet # context as the one that will be used during training with self.trainer.ctx: params = self._get_wavenet_args(bin_centers) params.update(pred_length=pred_length) trained_net = WaveNet(**params) self.trainer( net=trained_net, input_names=get_hybrid_forward_input_names(trained_net), train_iter=training_data_loader, ) # ensure that the prediction network is created within the same MXNet # context as the one that was used during training with self.trainer.ctx: return self.create_predictor(transformation, trained_net, bin_centers)
def test_distribution(): """ Makes sure additional tensors can be accessed and have expected shapes """ prediction_length = ds_info.prediction_length estimator = DeepAREstimator( freq=freq, prediction_length=prediction_length, trainer=Trainer(epochs=2, num_batches_per_epoch=1), distr_output=StudentTOutput(), ) train_output = estimator.train_model(train_ds, test_ds) # todo adapt loader to anomaly detection use-case batch_size = 2 num_samples = 3 training_data_loader = TrainDataLoader( dataset=train_ds, transform=train_output.transformation, batch_size=batch_size, num_batches_per_epoch=estimator.trainer.num_batches_per_epoch, ctx=mx.cpu(), ) seq_len = 2 * ds_info.prediction_length for data_entry in islice(training_data_loader, 1): input_names = get_hybrid_forward_input_names(train_output.trained_net) distr = train_output.trained_net.distribution( *[data_entry[k] for k in input_names] ) assert distr.sample(num_samples).shape == ( num_samples, batch_size, seq_len, )
def test_shape(): """ Makes sure additional tensors can be accessed and have expected shapes """ prediction_length = ds_info.prediction_length estimator = DeepAREstimator( freq=freq, prediction_length=prediction_length, trainer=Trainer(epochs=1, num_batches_per_epoch=1), distr_output=StudentTOutput(), ) training_transformation, trained_net = estimator.train_model(train_ds) # todo adapt loader to anomaly detection use-case batch_size = 2 training_data_loader = TrainDataLoader( dataset=train_ds, transform=training_transformation, batch_size=batch_size, num_batches_per_epoch=estimator.trainer.num_batches_per_epoch, ctx=mx.cpu(), ) seq_len = 2 * ds_info.prediction_length for data_entry in islice(training_data_loader, 1): input_names = get_hybrid_forward_input_names(trained_net) loss, likelihoods, *distr_args = trained_net( *[data_entry[k] for k in input_names]) distr = StudentT(*distr_args) assert likelihoods.shape == (batch_size, seq_len) assert distr.mu.shape == (batch_size, seq_len) assert distr.sigma.shape == (batch_size, seq_len) assert distr.nu.shape == (batch_size, seq_len)
def train(self, training_data: Dataset) -> Predictor: has_negative_data = any(np.any(d["target"] < 0) for d in training_data) low = -10.0 if has_negative_data else 0 high = 10.0 bin_centers = np.linspace(low, high, self.num_bins) bin_edges = np.concatenate( [[-1e20], (bin_centers[1:] + bin_centers[:-1]) / 2.0, [1e20]] ) transformation = self.create_transformation(bin_edges) transformation.estimate(iter(training_data)) training_data_loader = TrainDataLoader( dataset=training_data, transform=transformation, batch_size=self.trainer.batch_size, num_batches_per_epoch=self.trainer.num_batches_per_epoch, ctx=self.trainer.ctx, ) # ensure that the training network is created within the same MXNet # context as the one that will be used during training with self.trainer.ctx: trained_net = WaveNet(**self._get_wavenet_args(bin_centers)) self.trainer( net=trained_net, input_names=get_hybrid_forward_input_names(trained_net), train_iter=training_data_loader, ) # ensure that the prediction network is created within the same MXNet # context as the one that was used during training with self.trainer.ctx: return self.create_predictor( transformation, trained_net, bin_centers )
dataset=dataset.train, transform=train_output.transformation, batch_size=batch_size, num_batches_per_epoch=estimator.trainer.num_batches_per_epoch, ctx=mx.cpu(), ) for data_entry in islice(training_data_loader, 1): pass # we now call the train model to get the predicted distribution on each window # this allows us to investigate where are the biggest anomalies context_length = train_output.trained_net.context_length prediction_length = train_output.trained_net.prediction_length input_names = get_hybrid_forward_input_names(train_output.trained_net) distr = train_output.trained_net.distribution( *[data_entry[k] for k in input_names]) # gets all information into numpy array for further plotting samples = distr.sample(num_samples).asnumpy() percentiles = np.percentile(samples, axis=0, q=[10.0, 90.0]) target = mx.ndarray.concat(data_entry['past_target'], data_entry['future_target'], dim=1) target = target[:, -(context_length + prediction_length):] nll = -distr.log_prob(target).asnumpy() target = target.asnumpy() mean = samples.mean(axis=0) percentiles = np.percentile(samples, axis=0, q=[10.0, 90.0])
import mxnet as mx from gluonts.trainer import learning_rate_scheduler as lrs from estimator import net from gluonts.support.util import get_hybrid_forward_input_names import mxnet.gluon.nn as nn batch_size = 32 def loss_value(loss) -> float: return loss.get_name_value()[0][1] halt = False input_names = get_hybrid_forward_input_names(net) def count_model_params(self, net: nn.HybridBlock) -> int: params = net.collect_params() num_params = 0 for p in params: v = params[p] num_params += np.prod(v.shape) return num_params lr_scheduler = lrs.MetricAttentiveScheduler( objective="min", patience=10, decay_factor=0.5, min_lr=5e-5, )