def get_parameters(self, parameter, features, actions, reinitialize): """ Updates the parameters of the distribution Args: parameter (np.array): parameter of the distribution features (np.array): observation features actions (np.array): observation actions reinitialize (bool): for bucketizer to be applied on new features """ if not self.feature_map.anchor_points_initialized: self.feature_map.initialize_anchor_points(features, actions) self.bins = self.feature_map.action_anchor_points self.inds = np.digitize(actions, self.bins, right=True) bucketized_actions = self.bins[self.inds] fm_a = np.concatenate([self.indicator(bucketized_actions - b)[:, np.newaxis] for b in self.bins], axis=1) fm_c = self.feature_map.contextual_feature_map(features) self.fm_c_shape = fm_c.shape[1] self.representation = np.einsum('ij, ik -> ikj', fm_c, fm_a) if reinitialize: self.inds = np.digitize(actions, self.bins, right=True) self.inds[self.inds == np.max(self.inds)] = np.max(self.inds) -1 bucketized_actions = self.bins[self.inds] fm_a = np.concatenate([self.indicator(bucketized_actions - b)[:, np.newaxis] for b in self.bins], axis=1) fm_c = self.feature_map.contextual_feature_map(features) self.representation = np.einsum('ij, ik -> ikj', fm_c, fm_a) itcp, param = parameter[0], parameter[1:] param = param.reshape(self.bins.shape[0], self.fm_c_shape) return np.einsum('ikl, kl-> ik', self.representation, param) + itcp, self.inds
def assign_to_modal_uparams(this_uparam, modal_uparam): try: mid_pts = 0.5 * (modal_uparam[1:] + modal_uparam[:-1]) bins = np.concatenate(((-np.inf, ), mid_pts, (np.inf, ))) inds_in_modal = np.digitize(this_uparam, bins) - 1 numerical = True except: print('non-numerical parameter') numerical = False if numerical: uinds = np.unique(inds_in_modal) inds_in_this = np.zeros((0, ), dtype='int') for uind in uinds: candidates = np.where(inds_in_modal == uind)[0] dist_from_modal = np.abs(this_uparam[candidates] - modal_uparam[uind]) to_keep = candidates[np.argmin(dist_from_modal)] inds_in_this = np.concatenate((inds_in_this, (to_keep, ))) inds_in_modal = inds_in_modal[inds_in_this] bool_in_this = np.zeros((len(this_uparam), ), dtype='bool') bool_in_modal = np.zeros((len(modal_uparam), ), dtype='bool') bool_in_this[inds_in_this] = True bool_in_modal[inds_in_modal] = True else: assert (np.all(this_uparam == modal_uparam)) bool_in_this, bool_in_modal = [ np.ones(this_uparam.shape, dtype='bool') for iparam in range(2) ] return bool_in_this, bool_in_modal
def fit_gaussian(S, Cin, param_means, param_sigmas): ''' generates predictions using parameters sampled form the distribution, then outputs a gaussian fitted to the predictions ''' output_samples = sample_output(sol, Cin, true_params, prior_sigmas) * 100 # generate approximate gaussian bins = np.array(range(1000)) indeces = np.digitize(output_samples, bins) count1 = np.bincount(indeces[:, 0], minlength=1001)[1:] print(bins.shape) print(count1.shape) print(count1.shape) print(count1) count2 = np.bincount(indeces[:, 1], minlength=1001)[1:] # fit gaussians ''' popt1 = curve_fit(gaussian, bins, count1) popt2 = curve_fit(gaussian, bins, count2) print('popt1', popt1) print('popt2', popt2) ''' plt.plot(bins, count1) plt.plot(bins, count2) plt.show() sys.exit()
def __init__(self, counts, lengths, ploidy, multiscale_factor=1, constraint_lambdas=None, constraint_params=None): self.lengths = np.array(lengths) self.lengths_lowres = decrease_lengths_res(lengths, multiscale_factor) self.ploidy = ploidy self.multiscale_factor = multiscale_factor if constraint_lambdas is None: self.lambdas = {} else: self.lambdas = constraint_lambdas if constraint_params is None: self.params = {} else: self.params = constraint_params torm = find_beads_to_remove(counts=counts, nbeads=self.lengths_lowres.sum() * ploidy) self.torm_3d = np.repeat(torm.reshape(-1, 1), 3, axis=1) self.row, self.col = _constraint_dis_indices( counts=counts, n=self.lengths_lowres.sum(), lengths=self.lengths_lowres, ploidy=ploidy) # Calculating distances for neighbors, which are on the off diagonal # line - i & j where j = i + 1 row_adj = ag_np.unique(self.row).astype(int) row_adj = row_adj[ag_np.isin(row_adj + 1, self.col)] # Remove if "neighbor" beads are actually on different chromosomes or # homologs self.row_adj = row_adj[ag_np.digitize( row_adj, np.tile(lengths, ploidy).cumsum()) == ag_np.digitize( row_adj + 1, np.tile(lengths, ploidy).cumsum())] self.col_adj = self.row_adj + 1 self.check()
def resampling(w, rs): """ Stratified resampling with "nograd_primitive" to ensure autograd takes no derivatives through it. """ N = w.shape[0] bins = np.cumsum(w) ind = np.arange(N) u = (ind + rs.rand(N)) / N return np.digitize(u, bins)
def sim_q(prop_params, model_params, y, smc_obj, rs, verbose=False): """ Simulates a single sample from the VSMC approximation. Requires an SMC object with 2 member functions: -- sim_prop(t, x_{t-1}, y, prop_params, model_params, rs) -- log_weights(t, x_t, x_{t-1}, y, prop_params, model_params) """ # Extract constants T = y.shape[0] Dx = smc_obj.Dx N = smc_obj.N # Initialize SMC X = np.zeros((N, T, Dx)) logW = np.zeros(N) W = np.zeros((N, T)) ESS = np.zeros(T) for t in range(T): # Resampling if t > 0: ancestors = resampling(W[:, t - 1], rs) X[:, :t, :] = X[ancestors, :t, :] # Propagation X[:, t, :] = smc_obj.sim_prop(t, X[:, t - 1, :], y, prop_params, model_params, rs) # Weighting logW = smc_obj.log_weights(t, X[:, t, :], X[:, t - 1, :], y, prop_params, model_params) max_logW = np.max(logW) W[:, t] = np.exp(logW - max_logW) W[:, t] /= np.sum(W[:, t]) ESS[t] = 1. / np.sum(W[:, t]**2) # Sample from the empirical approximation bins = np.cumsum(W[:, -1]) u = rs.rand() B = np.digitize(u, bins) if verbose: print('Mean ESS', np.mean(ESS) / N) print('Min ESS', np.min(ESS)) return X[B, :, :]
def _setup(self): """ Setup the experiments and creates the data """ # Actions features, y = self.get_X_y_by_name() potentials = self._get_potentials(y) actions = self.rng.lognormal(mean=self.start_mu, sigma=self.start_sigma, size=potentials.shape[0]) rewards = self.get_rewards_from_actions(potentials, actions) if self.discrete: from scipy.stats import lognorm rv = lognorm(s=self.start_sigma, scale=np.exp(self.start_mu)) quantiles = np.quantile(actions, np.linspace(0, 1, self.discrete + 1)) action_anchors = np.pad(quantiles, 1, 'constant', constant_values=(1e-7, np.inf)) bins = action_anchors[:-1] inds = np.digitize(actions, bins, right=True) inds_1 = inds - 1 inds_1[inds_1 == -1] = 0 pi_logging = rv.cdf(bins[inds]) - rv.cdf(bins[inds_1]) else: pi_logging = Dataset.logging_policy(actions, self.start_mu, self.start_sigma) # Test train split self.actions_train, self.actions_test, self.features_train, self.features_test, self.reward_train, \ self.reward_test, self.pi_0_train, self.pi_0_test, self.potentials_train, self.potentials_test, \ self.l_train, self.l_test = train_test_split(actions, features, rewards, pi_logging, potentials, y, train_size=self.train_size, random_state=42) self.actions_train, self.actions_valid, self.features_train, self.features_valid, self.reward_train, \ self.reward_valid, self.pi_0_train, self.pi_0_valid, self.potentials_train, self.potentials_valid, \ self.l_train, self.l_valid = train_test_split(self.actions_train, self.features_train, self.reward_train, self.pi_0_train, self.potentials_train, self.l_train, train_size=self.val_size, random_state=42) min_max_scaler = MinMaxScaler(feature_range=(0, 1)) self.features_train = min_max_scaler.fit_transform(self.features_train) self.features_valid = min_max_scaler.transform(self.features_valid) self.features_test = min_max_scaler.transform(self.features_test) self.baseline_reward_valid = np.mean(self.reward_valid) self.baseline_reward_test = np.mean(self.reward_test)
def get_binned(fbin, tbin, lim): #lim = 0.8 num = 10 bins = np.linspace(0,lim,num = num) + np.random.uniform(-0.05, 0.05) ind = np.digitize(fbin, bins = bins) y = [] se = [] x = [] for i in np.arange(num): i += 1 rvals = tbin[ind == i] if rvals.size: x.append(bins[i-1]) y.append(np.mean(rvals)) se.append(np.std(rvals)) return (x, y, se)