Ejemplo n.º 1
0
    def get_parameters(self, parameter, features, actions, reinitialize):
        """ Updates the parameters of the distribution

        Args:
            parameter (np.array): parameter of the distribution
            features (np.array): observation features
            actions (np.array): observation actions
            reinitialize (bool): for bucketizer to be applied on new features

        """
        if not self.feature_map.anchor_points_initialized:
            self.feature_map.initialize_anchor_points(features, actions)
            self.bins = self.feature_map.action_anchor_points
            self.inds = np.digitize(actions, self.bins, right=True)
            bucketized_actions = self.bins[self.inds]
            fm_a = np.concatenate([self.indicator(bucketized_actions - b)[:, np.newaxis] for b in self.bins],
                                       axis=1)
            fm_c = self.feature_map.contextual_feature_map(features)
            self.fm_c_shape = fm_c.shape[1]
            self.representation = np.einsum('ij, ik -> ikj', fm_c, fm_a)

        if reinitialize:
            self.inds = np.digitize(actions, self.bins, right=True)
            self.inds[self.inds == np.max(self.inds)] = np.max(self.inds) -1
            bucketized_actions = self.bins[self.inds]
            fm_a = np.concatenate([self.indicator(bucketized_actions - b)[:, np.newaxis] for b in self.bins],
                                       axis=1)
            fm_c = self.feature_map.contextual_feature_map(features)
            self.representation = np.einsum('ij, ik -> ikj', fm_c, fm_a)

        itcp, param = parameter[0], parameter[1:]
        param = param.reshape(self.bins.shape[0], self.fm_c_shape)
        return np.einsum('ikl, kl-> ik', self.representation, param) + itcp, self.inds
Ejemplo n.º 2
0
def assign_to_modal_uparams(this_uparam, modal_uparam):
    try:
        mid_pts = 0.5 * (modal_uparam[1:] + modal_uparam[:-1])
        bins = np.concatenate(((-np.inf, ), mid_pts, (np.inf, )))
        inds_in_modal = np.digitize(this_uparam, bins) - 1
        numerical = True
    except:
        print('non-numerical parameter')
        numerical = False
    if numerical:
        uinds = np.unique(inds_in_modal)
        inds_in_this = np.zeros((0, ), dtype='int')
        for uind in uinds:
            candidates = np.where(inds_in_modal == uind)[0]
            dist_from_modal = np.abs(this_uparam[candidates] -
                                     modal_uparam[uind])
            to_keep = candidates[np.argmin(dist_from_modal)]
            inds_in_this = np.concatenate((inds_in_this, (to_keep, )))
        inds_in_modal = inds_in_modal[inds_in_this]
        bool_in_this = np.zeros((len(this_uparam), ), dtype='bool')
        bool_in_modal = np.zeros((len(modal_uparam), ), dtype='bool')
        bool_in_this[inds_in_this] = True
        bool_in_modal[inds_in_modal] = True
    else:
        assert (np.all(this_uparam == modal_uparam))
        bool_in_this, bool_in_modal = [
            np.ones(this_uparam.shape, dtype='bool') for iparam in range(2)
        ]
    return bool_in_this, bool_in_modal
def fit_gaussian(S, Cin, param_means, param_sigmas):
    '''
    generates predictions using parameters sampled form the distribution, then outputs
    a gaussian fitted to the predictions
    '''

    output_samples = sample_output(sol, Cin, true_params, prior_sigmas) * 100

    # generate approximate gaussian
    bins = np.array(range(1000))

    indeces = np.digitize(output_samples, bins)

    count1 = np.bincount(indeces[:, 0], minlength=1001)[1:]
    print(bins.shape)
    print(count1.shape)

    print(count1.shape)
    print(count1)
    count2 = np.bincount(indeces[:, 1], minlength=1001)[1:]

    # fit gaussians
    '''
    popt1 = curve_fit(gaussian, bins, count1)
    popt2 = curve_fit(gaussian, bins, count2)


    print('popt1', popt1)
    print('popt2', popt2)
    '''
    plt.plot(bins, count1)
    plt.plot(bins, count2)

    plt.show()
    sys.exit()
Ejemplo n.º 4
0
    def __init__(self,
                 counts,
                 lengths,
                 ploidy,
                 multiscale_factor=1,
                 constraint_lambdas=None,
                 constraint_params=None):

        self.lengths = np.array(lengths)
        self.lengths_lowres = decrease_lengths_res(lengths, multiscale_factor)
        self.ploidy = ploidy
        self.multiscale_factor = multiscale_factor
        if constraint_lambdas is None:
            self.lambdas = {}
        else:
            self.lambdas = constraint_lambdas
        if constraint_params is None:
            self.params = {}
        else:
            self.params = constraint_params
        torm = find_beads_to_remove(counts=counts,
                                    nbeads=self.lengths_lowres.sum() * ploidy)
        self.torm_3d = np.repeat(torm.reshape(-1, 1), 3, axis=1)

        self.row, self.col = _constraint_dis_indices(
            counts=counts,
            n=self.lengths_lowres.sum(),
            lengths=self.lengths_lowres,
            ploidy=ploidy)
        # Calculating distances for neighbors, which are on the off diagonal
        # line - i & j where j = i + 1
        row_adj = ag_np.unique(self.row).astype(int)
        row_adj = row_adj[ag_np.isin(row_adj + 1, self.col)]
        # Remove if "neighbor" beads are actually on different chromosomes or
        # homologs
        self.row_adj = row_adj[ag_np.digitize(
            row_adj,
            np.tile(lengths, ploidy).cumsum()) == ag_np.digitize(
                row_adj + 1,
                np.tile(lengths, ploidy).cumsum())]
        self.col_adj = self.row_adj + 1
        self.check()
Ejemplo n.º 5
0
def resampling(w, rs):
    """
    Stratified resampling with "nograd_primitive" to ensure autograd 
    takes no derivatives through it.
    """
    N = w.shape[0]
    bins = np.cumsum(w)
    ind = np.arange(N)
    u = (ind + rs.rand(N)) / N

    return np.digitize(u, bins)
Ejemplo n.º 6
0
def sim_q(prop_params, model_params, y, smc_obj, rs, verbose=False):
    """
    Simulates a single sample from the VSMC approximation.

    Requires an SMC object with 2 member functions:
    -- sim_prop(t, x_{t-1}, y, prop_params, model_params, rs)
    -- log_weights(t, x_t, x_{t-1}, y, prop_params, model_params)
    """
    # Extract constants
    T = y.shape[0]
    Dx = smc_obj.Dx
    N = smc_obj.N

    # Initialize SMC
    X = np.zeros((N, T, Dx))
    logW = np.zeros(N)
    W = np.zeros((N, T))
    ESS = np.zeros(T)

    for t in range(T):
        # Resampling
        if t > 0:
            ancestors = resampling(W[:, t - 1], rs)
            X[:, :t, :] = X[ancestors, :t, :]

        # Propagation
        X[:, t, :] = smc_obj.sim_prop(t, X[:, t - 1, :], y, prop_params,
                                      model_params, rs)

        # Weighting
        logW = smc_obj.log_weights(t, X[:, t, :], X[:, t - 1, :], y,
                                   prop_params, model_params)
        max_logW = np.max(logW)
        W[:, t] = np.exp(logW - max_logW)
        W[:, t] /= np.sum(W[:, t])
        ESS[t] = 1. / np.sum(W[:, t]**2)

    # Sample from the empirical approximation
    bins = np.cumsum(W[:, -1])
    u = rs.rand()
    B = np.digitize(u, bins)

    if verbose:
        print('Mean ESS', np.mean(ESS) / N)
        print('Min ESS', np.min(ESS))

    return X[B, :, :]
    def _setup(self):
        """ Setup the experiments and creates the data
        """
        # Actions
        features, y = self.get_X_y_by_name()
        potentials = self._get_potentials(y)
        actions = self.rng.lognormal(mean=self.start_mu,
                                     sigma=self.start_sigma,
                                     size=potentials.shape[0])
        rewards = self.get_rewards_from_actions(potentials, actions)
        if self.discrete:
            from scipy.stats import lognorm
            rv = lognorm(s=self.start_sigma, scale=np.exp(self.start_mu))
            quantiles = np.quantile(actions,
                                    np.linspace(0, 1, self.discrete + 1))
            action_anchors = np.pad(quantiles,
                                    1,
                                    'constant',
                                    constant_values=(1e-7, np.inf))
            bins = action_anchors[:-1]
            inds = np.digitize(actions, bins, right=True)
            inds_1 = inds - 1
            inds_1[inds_1 == -1] = 0
            pi_logging = rv.cdf(bins[inds]) - rv.cdf(bins[inds_1])
        else:
            pi_logging = Dataset.logging_policy(actions, self.start_mu,
                                                self.start_sigma)

        # Test train split
        self.actions_train, self.actions_test, self.features_train, self.features_test, self.reward_train, \
        self.reward_test, self.pi_0_train, self.pi_0_test, self.potentials_train, self.potentials_test, \
        self.l_train, self.l_test = train_test_split(actions, features, rewards, pi_logging, potentials, y,
                                                     train_size=self.train_size, random_state=42)

        self.actions_train, self.actions_valid, self.features_train, self.features_valid, self.reward_train, \
        self.reward_valid, self.pi_0_train, self.pi_0_valid, self.potentials_train, self.potentials_valid, \
        self.l_train, self.l_valid = train_test_split(self.actions_train, self.features_train, self.reward_train,
                                                      self.pi_0_train, self.potentials_train, self.l_train,
                                                     train_size=self.val_size, random_state=42)

        min_max_scaler = MinMaxScaler(feature_range=(0, 1))
        self.features_train = min_max_scaler.fit_transform(self.features_train)
        self.features_valid = min_max_scaler.transform(self.features_valid)
        self.features_test = min_max_scaler.transform(self.features_test)

        self.baseline_reward_valid = np.mean(self.reward_valid)
        self.baseline_reward_test = np.mean(self.reward_test)
Ejemplo n.º 8
0
def get_binned(fbin, tbin, lim):
    #lim = 0.8
    num = 10
    bins = np.linspace(0,lim,num = num) + np.random.uniform(-0.05, 0.05)
    ind = np.digitize(fbin, bins = bins)
    y = []
    se = []
    x = []
    
    for i in np.arange(num):
        i += 1
        rvals = tbin[ind == i]
        if rvals.size:
            x.append(bins[i-1])
            y.append(np.mean(rvals))
            se.append(np.std(rvals))
    
    return (x, y, se)