Example #1
0
    def cdf(self, x: Tensor):
        # Note: computes CDF on each dimension of the target independently.

        ix = [
            np.searchsorted(arr, val)
            for arr, val in zip(self.sorted_samples.T.asnumpy(), x.asnumpy())
        ]

        CDF_sorted = self.F.linspace(
            start=1 / len(self.samples),
            stop=1,
            endpoint=True,
            num=len(self.samples),
        )
        return CDF_sorted.take(indices=mx.nd.array(ix), axis=0)
    def cdf(self, x: Tensor):
        # Note: computes CDF on each dimension of the target independently.
        self.CDF_sorted = self.F.linspace(
            start=1 / self.samples.shape[0],
            stop=1,
            endpoint=True,
            num=self.samples.shape[0],
        )

        # Replace this when mxnet is updated to a version where `searchsorted` is available!
        import torch

        sorted_samples = torch.tensor(self.sorted_samples.asnumpy()).transpose(
            0, -1)
        x = torch.tensor(x.asnumpy()).unsqueeze(dim=-1)
        ix = torch.searchsorted(sorted_samples, x).squeeze().numpy()

        return self.CDF_sorted.take(indices=mx.nd.array(ix), axis=0)
Example #3
0
    def hybrid_forward(
        self,
        F,
        data: Tensor,
        observed_indicator: Tensor,
        scale: Optional[Tensor],
        rep_params: List[Tensor],
        **kwargs,
    ) -> Tuple[Tensor, Tensor, List[Tensor]]:
        data_np = data.asnumpy()
        observed_indicator_np = observed_indicator.astype("int32").asnumpy()

        if scale is None:
            # Even though local binning implicitly scales the data, we still return the scale as an input to the model.
            scale = F.expand_dims(
                F.sum(data * observed_indicator, axis=-1)
                / F.sum(observed_indicator, axis=-1),
                -1,
            )

            bin_centers_hyb = np.ones((len(data), self.num_bins)) * (-1)
            bin_edges_hyb = np.ones((len(data), self.num_bins + 1)) * (-1)

            # Every time series needs to be binned individually
            for i in range(len(data_np)):
                # Identify observed data points.
                data_loc = data_np[i]
                observed_indicator_loc = observed_indicator_np[i]
                data_obs_loc = data_loc[observed_indicator_loc == 1]

                if data_obs_loc.size > 0:
                    # Calculate time series specific bin centers and edges.
                    if self.is_quantile:
                        bin_centers_loc = np.quantile(
                            data_obs_loc, np.linspace(0, 1, self.num_bins)
                        )
                    else:
                        bin_centers_loc = np.linspace(
                            np.min(data_obs_loc),
                            np.max(data_obs_loc),
                            self.num_bins,
                        )
                    bin_centers_hyb[i] = ensure_binning_monotonicity(
                        bin_centers_loc
                    )
                    bin_edges_hyb[i] = bin_edges_from_bin_centers(
                        bin_centers_hyb[i]
                    )

                    # Bin the time series.
                    data_obs_loc_binned = np.digitize(
                        data_obs_loc, bins=bin_edges_hyb[i], right=False
                    )
                else:
                    data_obs_loc_binned = []

                # Write the binned time series back into the data array.
                data_loc[observed_indicator_loc == 1] = data_obs_loc_binned
                data_np[i] = data_loc

        else:
            bin_centers_hyb = rep_params[0].asnumpy()
            bin_edges_hyb = rep_params[1].asnumpy()

            bin_edges_hyb = np.repeat(
                bin_edges_hyb,
                len(data_np) // len(bin_edges_hyb),
                axis=0,
            )
            bin_centers_hyb = np.repeat(
                bin_centers_hyb,
                len(data_np) // len(bin_centers_hyb),
                axis=0,
            )

            for i in range(len(data_np)):
                data_loc = data_np[i]
                observed_indicator_loc = observed_indicator_np[i]
                data_obs_loc = data_loc[observed_indicator_loc == 1]

                # Bin the time series based on previously computed bin edges.
                data_obs_loc_binned = np.digitize(
                    data_obs_loc, bins=bin_edges_hyb[i], right=False
                )

                data_loc[observed_indicator_loc == 1] = data_obs_loc_binned
                data_np[i] = data_loc

        bin_centers_hyb = F.array(bin_centers_hyb)
        bin_edges_hyb = F.array(bin_edges_hyb)

        data = mx.nd.array(data_np)

        return data, scale, [bin_centers_hyb, bin_edges_hyb]
Example #4
0
def plot_samples(s: Tensor, bins: int = 100) -> None:
    from matplotlib import pyplot as plt

    s = s.asnumpy()
    plt.hist(s, bins=bins)
    plt.show()