예제 #1
0
    def _empirical_cdf_inverse_transform(
        batch_target_sorted: Tensor,
        batch_predictions: Tensor,
        slopes: Tensor,
        intercepts: Tensor,
    ) -> np.ndarray:
        """
        Apply forward transformation of the empirical CDF.

        Parameters
        ----------
        batch_target_sorted
            Sorted targets of the input batch.
        batch_predictions
            Predictions of the underlying probability distribution
        slopes
            Slopes of the piece-wise linear function.
        intercepts
            Intercepts of the piece-wise linear function.

        Returns
        -------
        outputs
            Forward transformed outputs.

        """
        slopes = slopes.asnumpy()
        intercepts = intercepts.asnumpy()

        batch_target_sorted = batch_target_sorted.asnumpy()
        batch_size, num_timesteps, target_dim = batch_target_sorted.shape
        indices = np.floor(batch_predictions * num_timesteps)
        # indices = indices - 1
        # for now project into [0, 1]
        indices = np.clip(indices, 0, num_timesteps - 1)
        indices = indices.astype(np.int)

        transformed = np.where(
            np.take_along_axis(slopes, indices, axis=1) != 0.0,
            (
                batch_predictions
                - np.take_along_axis(intercepts, indices, axis=1)
            )
            / np.take_along_axis(slopes, indices, axis=1),
            np.take_along_axis(batch_target_sorted, indices, axis=1),
        )
        return transformed
예제 #2
0
def plot_samples(s: Tensor, bins: int = 100) -> None:
    from matplotlib import pyplot as plt

    s = s.asnumpy()
    plt.hist(s, bins=bins)
    plt.show()
    def hybrid_forward(
        self,
        F,
        data: Tensor,
        observed_indicator: Tensor,
        scale: Optional[Tensor],
        rep_params: List[Tensor],
        **kwargs,
    ) -> Tuple[Tensor, Tensor, List[Tensor]]:
        data_np = data.asnumpy()
        observed_indicator_np = observed_indicator.astype("int32").asnumpy()

        if scale is None:
            # Even though local binning implicitly scales the data, we still return the scale as an input to the model.
            scale = F.expand_dims(
                F.sum(data * observed_indicator, axis=-1) /
                F.sum(observed_indicator, axis=-1),
                -1,
            )

            bin_centers_hyb = np.ones((len(data), self.num_bins)) * (-1)
            bin_edges_hyb = np.ones((len(data), self.num_bins + 1)) * (-1)

            # Every time series needs to be binned individually
            for i in range(len(data_np)):
                # Identify observed data points.
                data_loc = data_np[i]
                observed_indicator_loc = observed_indicator_np[i]
                data_obs_loc = data_loc[observed_indicator_loc == 1]

                if data_obs_loc.size > 0:
                    # Calculate time series specific bin centers and edges.
                    if self.is_quantile:
                        bin_centers_loc = np.quantile(
                            data_obs_loc, np.linspace(0, 1, self.num_bins))
                    else:
                        bin_centers_loc = np.linspace(
                            np.min(data_obs_loc),
                            np.max(data_obs_loc),
                            self.num_bins,
                        )
                    bin_centers_hyb[i] = ensure_binning_monotonicity(
                        bin_centers_loc)
                    bin_edges_hyb[i] = bin_edges_from_bin_centers(
                        bin_centers_hyb[i])

                    # Bin the time series.
                    data_obs_loc_binned = np.digitize(data_obs_loc,
                                                      bins=bin_edges_hyb[i],
                                                      right=False)
                else:
                    data_obs_loc_binned = []

                # Write the binned time series back into the data array.
                data_loc[observed_indicator_loc == 1] = data_obs_loc_binned
                data_np[i] = data_loc

        else:
            bin_centers_hyb = rep_params[0].asnumpy()
            bin_edges_hyb = rep_params[1].asnumpy()

            bin_edges_hyb = np.repeat(
                bin_edges_hyb,
                len(data_np) / len(bin_edges_hyb),
                axis=0,
            )
            bin_centers_hyb = np.repeat(
                bin_centers_hyb,
                len(data_np) / len(bin_centers_hyb),
                axis=0,
            )

            for i in range(len(data_np)):
                data_loc = data_np[i]
                observed_indicator_loc = observed_indicator_np[i]
                data_obs_loc = data_loc[observed_indicator_loc == 1]

                # Bin the time series based on previously computed bin edges.
                data_obs_loc_binned = np.digitize(data_obs_loc,
                                                  bins=bin_edges_hyb[i],
                                                  right=False)

                data_loc[observed_indicator_loc == 1] = data_obs_loc_binned
                data_np[i] = data_loc

        bin_centers_hyb = F.array(bin_centers_hyb)
        bin_edges_hyb = F.array(bin_edges_hyb)

        data = mx.nd.array(data_np)

        return data, scale, [bin_centers_hyb, bin_edges_hyb]