Ejemplo n.º 1
0
    def sample_interventional_perfect(self, interventions: PerfectIntervention, nsamples: int = 1) -> np.array:
        """
        Return `nsamples` samples from the graph under a perfect intervention

        Parameters
        ----------
        interventions:
        nsamples:

        Returns
        -------
        (nsamples x nnodes) matrix of samples.
        """
        samples = np.zeros((nsamples, len(self._nodes)))
        noise = np.zeros((nsamples, len(self._nodes)))

        for ix, (node, mean, var) in enumerate(zip(self._node_list, self._means, self._variances)):
            interventional_dist = interventions.get(node)
            if interventional_dist is not None:
                noise[:, ix] = interventional_dist.sample(nsamples)
            else:
                noise[:, ix] = np.random.normal(loc=mean, scale=var ** .5, size=nsamples)

        t = self.topological_sort()
        for node in t:
            ix = self._node2ix[node]
            parents = self._parents[node]
            if node not in interventions and len(parents) != 0:
                parent_ixs = [self._node2ix[p] for p in self._parents[node]]
                parent_vals = samples[:, parent_ixs]
                samples[:, ix] = np.sum(parent_vals * self._weight_mat[parent_ixs, node], axis=1) + noise[:, ix]
            else:
                samples[:, ix] = noise[:, ix]

        return samples
Ejemplo n.º 2
0
    def logpdf(self, samples: np.array, interventions: PerfectIntervention = None, exclude_intervention_prob=True) -> np.array:
        # TODO this is about 10x slower than using multivariate_normal.logpdf with the covariance matrix
        # TODO can I speed this up? where is the time spent?

        sorted_nodes = self.topological_sort()
        nsamples = samples.shape[0]
        log_probs = np.zeros(nsamples)

        if interventions is None:
            for node in sorted_nodes:
                node_ix = self._node2ix[node]
                parent_ixs = [self._node2ix[p] for p in self._parents[node]]
                if len(parent_ixs) != 0:
                    parent_vals = samples[:, parent_ixs]
                    correction = (parent_vals * self._weight_mat[parent_ixs, node]).sum(axis=1)
                else:
                    correction = 0
                log_probs += norm.logpdf(samples[:, node_ix] - correction, scale=self._variances[node_ix]**.5)
        else:
            for node in sorted_nodes:
                node_ix = self._node2ix[node]
                iv = interventions.get(node)
                if iv is not None:
                    if not exclude_intervention_prob:
                        if isinstance(iv, GaussIntervention):
                            log_probs += iv.logpdf(samples[:, node_ix])
                        else:
                            log_probs += np.log(iv.pdf(samples[:, node_ix]))
                else:
                    parent_ixs = [self._node2ix[p] for p in self._parents[node]]
                    parent_vals = samples[:, parent_ixs]
                    correction = (parent_vals * self._weight_mat[parent_ixs, node]).sum(axis=1)
                    log_probs += norm.logpdf(samples[:, node_ix] - correction, scale=self._variances[node_ix]**.5)

        return log_probs