def process_dynamic_cat(self, F, feature: Tensor) -> Tensor: return self.embed_dynamic(feature.astype(self.dtype))
def process_static_cat(self, F, feature: Tensor) -> Tensor: feature = self.embed_static(feature.astype(self.dtype)) return F.tile(feature.expand_dims(axis=1), reps=(1, self.T, 1))
def hybrid_forward( self, F, data: Tensor, observed_indicator: Tensor, scale: Optional[Tensor], rep_params: List[Tensor], **kwargs, ) -> Tuple[Tensor, Tensor, List[Tensor]]: data_np = data.asnumpy() observed_indicator_np = observed_indicator.astype("int32").asnumpy() if scale is None: # Even though local binning implicitly scales the data, we still return the scale as an input to the model. scale = F.expand_dims( F.sum(data * observed_indicator, axis=-1) / F.sum(observed_indicator, axis=-1), -1, ) bin_centers_hyb = np.ones((len(data), self.num_bins)) * (-1) bin_edges_hyb = np.ones((len(data), self.num_bins + 1)) * (-1) # Every time series needs to be binned individually for i in range(len(data_np)): # Identify observed data points. data_loc = data_np[i] observed_indicator_loc = observed_indicator_np[i] data_obs_loc = data_loc[observed_indicator_loc == 1] if data_obs_loc.size > 0: # Calculate time series specific bin centers and edges. if self.is_quantile: bin_centers_loc = np.quantile( data_obs_loc, np.linspace(0, 1, self.num_bins)) else: bin_centers_loc = np.linspace( np.min(data_obs_loc), np.max(data_obs_loc), self.num_bins, ) bin_centers_hyb[i] = ensure_binning_monotonicity( bin_centers_loc) bin_edges_hyb[i] = bin_edges_from_bin_centers( bin_centers_hyb[i]) # Bin the time series. data_obs_loc_binned = np.digitize(data_obs_loc, bins=bin_edges_hyb[i], right=False) else: data_obs_loc_binned = [] # Write the binned time series back into the data array. data_loc[observed_indicator_loc == 1] = data_obs_loc_binned data_np[i] = data_loc else: bin_centers_hyb = rep_params[0].asnumpy() bin_edges_hyb = rep_params[1].asnumpy() bin_edges_hyb = np.repeat( bin_edges_hyb, len(data_np) / len(bin_edges_hyb), axis=0, ) bin_centers_hyb = np.repeat( bin_centers_hyb, len(data_np) / len(bin_centers_hyb), axis=0, ) for i in range(len(data_np)): data_loc = data_np[i] observed_indicator_loc = observed_indicator_np[i] data_obs_loc = data_loc[observed_indicator_loc == 1] # Bin the time series based on previously computed bin edges. data_obs_loc_binned = np.digitize(data_obs_loc, bins=bin_edges_hyb[i], right=False) data_loc[observed_indicator_loc == 1] = data_obs_loc_binned data_np[i] = data_loc bin_centers_hyb = F.array(bin_centers_hyb) bin_edges_hyb = F.array(bin_edges_hyb) data = mx.nd.array(data_np) return data, scale, [bin_centers_hyb, bin_edges_hyb]
def hybrid_forward( self, F, feat_static_cat: Tensor, past_target: Tensor, past_observed_values: Tensor, past_is_pad: Tensor, past_time_feat: Tensor, future_time_feat: Tensor, scale: Tensor, ) -> Tensor: embedded_cat = self.feature_embedder(feat_static_cat) static_feat = F.concat(embedded_cat, F.log(scale + 1.0), dim=1) past_target = past_target.astype("int32") def blow_up(u): """ Expand to (batch_size x num_samples) """ return F.repeat(u, repeats=self.num_samples, axis=0) def is_last_layer(i): return i + 1 == len(self.dilations) queues = [] full_time_features = F.concat(past_time_feat, future_time_feat, dim=-1) future_observed_values = F.slice_axis( future_time_feat, begin=0, end=1, axis=1 ).ones_like() full_observed = F.concat( F.expand_dims(past_observed_values, axis=1), future_observed_values, dim=-1, ) repeated_static_feat = F.repeat( F.expand_dims(static_feat, axis=-1), repeats=self.pred_length + self.receptive_field, axis=-1, ) full_features = F.concat( full_time_features, full_observed, repeated_static_feat, dim=1 ) feature_slice = F.slice_axis( full_features, begin=-self.pred_length - self.receptive_field + 1, end=None, axis=-1, ) tmp = F.slice_axis( past_target, begin=-self.receptive_field, end=None, axis=-1 ) o = self.target_embed(tmp).swapaxes(1, 2) o = F.concat( o, F.slice_axis( feature_slice, begin=-self.receptive_field, end=None, axis=-1 ), dim=1, ) o = self.conv_project(o) for i, d in enumerate(self.dilations): sz = 1 if d == 2 ** (self.dilation_depth - 1) else d * 2 _, o = self.residuals[i](o) if not is_last_layer(i): o_chunk = F.slice_axis(o, begin=-sz - 1, end=-1, axis=-1) else: o_chunk = o queues.append(blow_up(o_chunk)) res = F.slice_axis(past_target, begin=-2, end=None, axis=-1) res = blow_up(res) for n in range(self.pred_length): queues_next = [] o = self.target_embed( F.slice_axis(res, begin=-2, end=None, axis=-1) ).swapaxes(1, 2) b = F.slice_axis( full_features, begin=self.receptive_field + n - 1, end=self.receptive_field + n + 1, axis=-1, ) b = blow_up(b) o = F.concat(o, b, dim=1) o = self.conv_project(o) skip_outs = [] for i, d in enumerate(self.dilations): skip, o = self.residuals[i](o) skip_outs.append(skip) if not is_last_layer(i): q = queues[i] o = F.concat(q, o, num_args=2, dim=-1) queues_next.append( F.slice_axis(o, begin=1, end=None, axis=-1) ) queues = queues_next y = sum(skip_outs) y = self.output_act(y) y = self.conv1(y) y = self.output_act(y) unnormalized_outputs = self.conv2(y) if self.temperature > 0: probs = F.softmax( unnormalized_outputs / self.temperature, axis=1 ) y = F.sample_multinomial(probs.swapaxes(1, 2)) else: y = F.argmax(unnormalized_outputs, axis=1) y = y.astype("int32") res = F.concat(res, y, num_args=2, dim=-1) samples = F.slice_axis(res, begin=-self.pred_length, end=None, axis=-1) samples = samples.reshape( shape=(-1, self.num_samples, self.pred_length) ) samples = self.post_transform(samples) samples = F.broadcast_mul(scale.expand_dims(axis=1), samples) return samples
def hybrid_forward( self, F, feat_static_cat: Tensor, past_target: Tensor, past_observed_values: Tensor, past_time_feat: Tensor, future_time_feat: Tensor, scale: Tensor, ) -> Tensor: """ Computes the training loss for the wavenet model. Parameters ---------- F feat_static_cat Static categorical features: (batch_size, num_cat_features) past_target Past target: (batch_size, receptive_field) past_observed_values Observed value indicator for the past target: (batch_size, receptive_field) past_time_feat Past time features: (batch_size, num_time_features, receptive_field) future_time_feat Future time features: (batch_size, num_time_features, pred_length) scale scale of the time series: (batch_size, 1) Returns ------- Tensor Prediction samples with shape (batch_size, num_samples, pred_length) """ def blow_up(u): """ Expand to (batch_size x num_samples) """ return F.repeat(u, repeats=self.num_samples, axis=0) past_target = past_target.astype("int32") full_features = self.get_full_features( F, feat_static_cat=feat_static_cat, past_observed_values=past_observed_values, past_time_feat=past_time_feat, future_time_feat=future_time_feat, future_observed_values=None, scale=scale, ) # To compute queues for the first step, we need features from # -self.pred_length - self.receptive_field + 1 to -self.pred_length + 1 features_end_ix = ( -self.pred_length + 1 if self.pred_length > 1 else None ) queues = self.get_initial_conv_queues( F, past_target=F.slice_axis( past_target, begin=-self.receptive_field, end=None, axis=-1 ), features=F.slice_axis( full_features, begin=-self.pred_length - self.receptive_field + 1, end=features_end_ix, axis=-1, ), ) queues = [blow_up(queue) for queue in queues] res = F.slice_axis(past_target, begin=-2, end=None, axis=-1) res = blow_up(res) for n in range(self.pred_length): # Generate one-step ahead predictions. The input consists of target and features # corresponding to the last two time steps. current_target = F.slice_axis(res, begin=-2, end=None, axis=-1) current_features = F.slice_axis( full_features, begin=self.receptive_field + n - 1, end=self.receptive_field + n + 1, axis=-1, ) embedding = self.target_feature_embedding( F, target=current_target, features=blow_up(current_features), ) # (batch_size, 1, num_bins) where 1 corresponds to the time axis. unnormalized_outputs, queues = self.base_net( F, embedding, one_step_prediction=True, queues=queues ) if self.temperature > 0: # (batch_size, 1, num_bins) where 1 corresponds to the time axis. probs = F.softmax( unnormalized_outputs / self.temperature, axis=-1 ) # (batch_size, 1) y = F.sample_multinomial(probs) else: # (batch_size, 1) y = F.argmax(unnormalized_outputs, axis=-1) y = y.astype("int32") res = F.concat(res, y, num_args=2, dim=-1) samples = F.slice_axis(res, begin=-self.pred_length, end=None, axis=-1) samples = samples.reshape( shape=(-1, self.num_samples, self.pred_length) ) samples = self.post_transform(samples) samples = F.broadcast_mul(scale.expand_dims(axis=1), samples) return samples