Beispiel #1
0
    def optimize(self, curb=None):
        assert hasattr(self, "inputs")
        assert hasattr(self, "targets")
        x = np.atleast_2d(self.inputs)
        y = np.atleast_2d(self.targets)
        assert len(x) == len(y)

        n, D = x.shape
        n, E = y.shape

        if curb is not None:
            self.curb = curb
        elif not hasattr(self, "curb"):
            self.curb = Empty()
            self.curb.snr = 500
            self.curb.ls = 100
            self.curb.std = std(x, 0)

        if not hasattr(self, "hyp"):
            self.hyp = np.zeros([E, D + 2])
            self.hyp[:, :D] = np.repeat(log(std(x, 0)).reshape(1, D), E, 0)
            self.hyp[:, D] = log(std(y, 0))
            self.hyp[:, -1] = log(std(y, 0) / 10)

        print("Train hyperparameters of full GP...")
        try:
            self.result = minimize(
                value_and_grad(self.hyp_crub), self.hyp, jac=True)
        except Exception:
            self.result = minimize(
                value_and_grad(self.hyp_crub), self.hyp, jac=True, method='CG')

        self.hyp = self.result.get('x').reshape(E, -1)
        self.cache()
Beispiel #2
0
def regression_data(seed, data_count=500):
    """
    Generate data from a noisy sine wave.
    :param seed: random number seed
    :param data_count: number of data points.
    :return:
    """
    np.random.seed(seed)
    noise_var = 0.1

    x = np.linspace(-4, 4, data_count)
    y = 1 * np.sin(x) + np.sqrt(noise_var) * npr.randn(data_count)

    train_count = int(0.2 * data_count)
    idx = npr.permutation(range(data_count))
    x_train = x[idx[:train_count], np.newaxis]
    x_test = x[idx[train_count:], np.newaxis]
    y_train = y[idx[:train_count]]
    y_test = y[idx[train_count:]]

    mu = np.mean(x_train, 0)
    std = np.std(x_train, 0)
    x_train = (x_train - mu) / std
    x_test = (x_test - mu) / std
    mu = np.mean(y_train, 0)
    std = np.std(y_train, 0)
    y_train = (y_train - mu) / std
    train_stats = dict()
    train_stats['mu'] = mu
    train_stats['sigma'] = std

    return x_train, y_train, x_test, y_test, train_stats
Beispiel #3
0
    def standardize(self):
        zscore = lambda x, mu, sigma: (x - mu.reshape(1, -1)) / sigma.reshape(
            1, -1)
        un_zscore = lambda x, mu, sigma: x * sigma.reshape(1, -1) + mu.reshape(
            1, -1)

        if not self.standardized:

            self.mu_x = np.mean(self.x_train, axis=0)
            self.sigma_x = np.std(self.x_train, axis=0)

            self.x_train = zscore(self.x_train, self.mu_x, self.sigma_x)
            if self.x_test is not None:
                self.x_test = zscore(self.x_test, self.mu_x, self.sigma_x)

            if self.family == 'gaussian':
                #self.mu_f = np.mean(self.f_train, axis=0)
                #self.sigma_f = np.std(self.f_train, axis=0)

                self.mu_y = np.mean(self.y_train, axis=0)
                self.sigma_y = np.std(self.y_train, axis=0)

                self.f_train = zscore(self.f_train, self.mu_y, self.sigma_y)
                if self.f_test is not None:
                    self.f_test = zscore(self.f_test, self.mu_y, self.sigma_y)

                self.y_train = zscore(self.y_train, self.mu_y, self.sigma_y)
                if self.y_test is not None:
                    self.y_test = zscore(self.y_test, self.mu_y, self.sigma_y)

                self.f_orig = self.f
                self.f = lambda x: zscore(
                    self.f_orig(un_zscore(x, self.mu_x, self.sigma_x)), self.
                    mu_y, self.sigma_y)
            self.standardized = True
def summarize_res(sname, datasize):
    print(sname)
    res = []
    times = []
    for i in range(100):
        PATH = ROOT_PATH + "/MMR_IVs/results/zoo/" + sname + "/"
        filename = os.path.join(
            PATH, str(date.today()),
            'LMO_errs_{}_nystr_prodkern_{}.npy'.format(i, datasize))
        if os.path.exists(filename):
            tmp_res = np.load(filename, allow_pickle=True)
            if tmp_res[-1] is not None:
                res += [tmp_res[-1]]
        time_path = os.path.join(
            PATH, str(date.today()),
            '/LMO_errs_{}_nystr_prodkern_{}_time.npy'.format(i, datasize))
        if os.path.exists(time_path):
            t = np.load(time_path)
            times += [t]
    res = np.array(res)
    times = np.array(times)
    res = remove_outliers(res)
    times = np.sort(times)[:80]
    print(times)
    print('mean, std: ', np.mean(res), np.std(res))
    print('time: ', np.mean(times), np.std(times))
 def rand_theta(self, scale):
     theta = scale * np.random.randn(self.num_param)
     theta[0] = np.log(np.std(self.train_y)/2)
     theta[1] = np.log(np.std(self.train_y))
     for i in range(self.dim):
         theta[2+i] = np.maximum(-100, np.log(0.5*(self.train_x[i].max() - self.train_x[i].min())))
     return theta
Beispiel #6
0
 def __init__(self,
              train_x,
              train_y,
              layer_sizes,
              activations,
              bfgs_iter=100,
              l1=0,
              l2=0,
              debug=False):
     self.train_x = np.copy(train_x)
     self.train_y = np.copy(train_y)
     self.dim = train_x.shape[0]
     self.num_train = train_x.shape[1]
     self.nn = NN(layer_sizes, activations)
     self.num_param = 2 + self.dim + self.nn.num_param(self.dim)
     self.bfgs_iter = bfgs_iter
     self.l1 = l1
     self.l2 = l2
     self.debug = debug
     self.m = layer_sizes[-1]
     self.in_mean = np.mean(self.train_x, axis=1)
     self.in_std = np.std(self.train_x, axis=1)
     self.train_x = ((self.train_x.T - self.in_mean) / self.in_std).T
     self.out_mean = np.mean(self.train_y)
     self.out_std = np.std(self.train_y)
     self.train_y = (self.train_y - self.out_mean) / self.out_std
     self.loss = np.inf
Beispiel #7
0
 def get_default_theta(self):
     theta = np.random.randn(4 + self.dim)
     for i in range(self.dim):
         theta[1+i] = np.maximum(-100, np.log(0.5*(self.train_x[i].max() - self.train_x[i].min()))) #length scale
     theta[self.dim+1] = np.log(np.std(self.src_y)) # sigma2_src
     theta[self.dim+2] = np.log(np.std(self.tag_y)) # sigma2_tag
     theta[self.dim+3] = 2 * np.random.random(1) - 1 # -1< lambda <1
     return theta
 def getNoise(self):
     #estimate based off of vals less than 3 sigma above mean
     #did a couple of spot checks and this works pretty well, need more rigor in future
     noi = np.std(self.data[self.data < np.average(self.data) +
                            2 * np.std(self.data)])
     print('noise is:')
     print(noi)
     return noi
Beispiel #9
0
    def test_logitnormal_moments(self):
        # global parameters for computing lognormals
        gh_loc, gh_weights = hermgauss(4)

        # log normal parameters
        lognorm_means = np.random.random((5, 3)) # should work for arrays now
        lognorm_infos = np.random.random((5, 3))**2 + 1
        alpha = 2 # dp parameter

        # draw samples
        num_draws = 10**5
        samples = np.random.normal(lognorm_means,
                        1/np.sqrt(lognorm_infos), size = (num_draws, 5, 3))
        logit_norm_samples = sp.special.expit(samples)

        # test lognormal means
        np_test.assert_allclose(
            np.mean(logit_norm_samples, axis = 0),
            ef.get_e_logitnormal(
                lognorm_means, lognorm_infos, gh_loc, gh_weights),
            atol = 3 * np.std(logit_norm_samples) / np.sqrt(num_draws))

        # test Elog(x) and Elog(1-x)
        log_logistic_norm = np.mean(np.log(logit_norm_samples), axis = 0)
        log_1m_logistic_norm = np.mean(np.log(1 - logit_norm_samples), axis = 0)

        tol1 = 3 * np.std(np.log(logit_norm_samples))/ np.sqrt(num_draws)
        tol2 = 3 * np.std(np.log(1 - logit_norm_samples))/ np.sqrt(num_draws)

        np_test.assert_allclose(
            log_logistic_norm,
            ef.get_e_log_logitnormal(
                lognorm_means, lognorm_infos, gh_loc, gh_weights)[0],
            atol = tol1)

        np_test.assert_allclose(
            log_1m_logistic_norm,
            ef.get_e_log_logitnormal(
                        lognorm_means, lognorm_infos, gh_loc, gh_weights)[1],
            atol = tol2)

        # test prior
        prior_samples = np.mean((alpha - 1) *
                            np.log(1 - logit_norm_samples), axis = 0)
        tol3 = 3 * np.std((alpha - 1) * np.log(1 - logit_norm_samples)) \
                    /np.sqrt(num_draws)
        np_test.assert_allclose(
            prior_samples,
            ef.get_e_dp_prior_logitnorm_approx(
                        alpha, lognorm_means, lognorm_infos, gh_loc, gh_weights),
            atol = tol3)

        x = np.random.normal(0, 1e2, size = 10)
        def e_log_v(x):
            return np.sum(ef.get_e_log_logitnormal(\
                        x[0:5], np.abs(x[5:10]), gh_loc, gh_weights)[0])
        check_grads(e_log_v, order=2)(x)
    def mean_std(self):
        """Compute the average standard deviation """

        #Gaussian width = mean of stds of all dimensions
        X, Y = self.xy()
        stdx = np.mean(np.std(X, 0))
        stdy = np.mean(np.std(Y, 0))
        mstd = old_div((stdx + stdy), 2.0)
        return mstd
 def rand_theta(self, scale):
     theta = scale * np.random.randn(self.num_param)
     theta[0] = 1.0
     theta[1] = np.log(np.std(self.low_y))
     theta[2] = np.log(np.std(self.high_y))
     for i in range(self.dim):
         theta[4+i] = np.maximum(-100, np.log(0.5*(self.low_x[i].max() - self.low_x[i].min())))
         theta[5+self.dim+i] = np.maximum(-100, np.log(0.5*(self.high_x[i].max() - self.high_x[i].min())))
     return theta
Beispiel #12
0
 def Fit(self, X, Y, **kwargs):
     if self.ignore_X:
         self.Xmean = 0
         self.Xsigma = 1
     else:
         self.Xmean = np.mean(X, axis=0)
         self.Xsigma = np.std(X, axis=0)
     self.Ymean = np.mean(Y, axis=0)
     self.Ysigma = np.std(Y, axis=0)
 def rand_theta(self, scale=0.1):
     """ 
     Generate an initial theta, the weights of NN are randomly initialized
     """
     theta = scale * np.random.randn(self.num_param)
     theta[0] = np.log(np.std(self.train_y) / 2)
     theta[1] = np.log(np.std(self.train_y))
     for i in range(self.dim):
         theta[2 *  + i] = np.maximum(-100, np.log(0.5 * (self.train_x[i, :].max() - self.train_x[i, :].min())))
     return theta
Beispiel #14
0
    def evaluate(self, n_trajectories, print_reward=False):
        """
        Evaluate the deterministic policy for N full trajectories.
        :param n_trajectories: number of trajectories to use for the evaluation.
        :return (cumulative_reward, mean_traj_reward):
        """
        total_reward, trajectory = 0, 0
        traj_rewards = []
        traj_reward = 0
        state = self.env.reset()
        φ_s = self.φ_fn(state)
        step = 0

        actions = []
        states = []

        print('Evaluating the deterministic policy...')

        while len(traj_rewards) < n_trajectories:
            step += 1
            action = π(φ_s, θ=self.θ, Σ=self.Σ, deterministic=True)
            states.append(state)
            actions.append(action)
            next_state, reward, done, _ = self.env.step(action)
            total_reward += reward
            traj_reward += reward
            if self.render:
                self.env.render()
            if done:
                # print(step)
                step = 0
                φ_s = self.φ_fn(self.env.reset())
                trajectory += 1
                traj_rewards.append(traj_reward)
                traj_reward = 0
                if print_reward:
                    # print(traj_rewards)
                    print(len(traj_rewards), 'trajectories: total',
                          total_reward, 'mean', np.mean(traj_rewards), 'std',
                          np.std(traj_rewards), 'max', np.max(traj_rewards))
                    #print('states', states)
                    #print('actions', actions)
                    # print()
                states = []
                actions = []
            else:
                state = next_state
                φ_s = self.φ_fn(next_state)
        mean_traj_reward = total_reward / n_trajectories
        if print_reward:
            print('FINAL: total', total_reward, 'mean', np.mean(traj_rewards),
                  'std', np.std(traj_rewards), 'max', np.max(traj_rewards))
            print()
        return total_reward, mean_traj_reward
Beispiel #15
0
 def __str__(self):
     mean_y1 = np.mean(self.Y1, 0)
     std_y1 = np.std(self.Y1, 0)
     mean_y2 = np.mean(self.Y2, 0)
     std_y2 = np.std(self.Y2, 0)
     prec = 4
     desc = ''
     desc += 'E[y1] = %s; ' % (np.array_str(mean_y1, precision=prec))
     desc += 'E[y2] = %s; ' % (np.array_str(mean_y2, precision=prec))
     desc += 'Std[y1] = %s; ' % (np.array_str(std_y1, precision=prec))
     desc += 'Std[y2] = %s; ' % (np.array_str(std_y2, precision=prec))
     return desc
Beispiel #16
0
 def rand_theta(self, scale=1):
     '''
     generate an initial theta, the weights of NN are randomly initialized
     '''
     theta = scale * np.random.randn(self.num_param)
     theta[0] = np.log(np.std(self.train_y_zero) / 2)
     theta[1] = np.log(np.std(self.train_y_zero))
     for i in range(self.dim):
         theta[2 + i] = np.maximum(
             -100,
             np.log(0.5 * (self.train_x[i].max() - self.train_x[i].min())))
     return theta
 def __str__(self):
     mean_x = np.mean(self.X, 0)
     std_x = np.std(self.X, 0)
     mean_y = np.mean(self.Y, 0)
     std_y = np.std(self.Y, 0)
     prec = 4
     desc = ''
     desc += 'E[x] = %s \n' % (np.array_str(mean_x, precision=prec))
     desc += 'E[y] = %s \n' % (np.array_str(mean_y, precision=prec))
     desc += 'Std[x] = %s \n' % (np.array_str(std_x, precision=prec))
     desc += 'Std[y] = %s \n' % (np.array_str(std_y, precision=prec))
     return desc
Beispiel #18
0
    def get_init_hyperparams(self) -> tuple:
        """
        Compute initial hyperparameters for dynamics GP
        :return: [length scales, signal variance, noise variance]
        """
        length_scales = np.repeat(np.log(
            np.std(self.state_action_pairs, axis=0)).reshape(1, -1),
                                  self.state_dim,
                                  axis=0)
        sigma_f = np.log(np.std(self.state_delta, axis=0))
        sigma_eps = np.log(np.std(self.state_delta, axis=0) / 10)

        return length_scales, sigma_f, sigma_eps
 def rand_theta(self, scale=1):
     """ 
     Generate an initial theta, the weights of NN are randomly initialized
     """
     theta = scale * np.random.randn(self.num_param)
     # noises and self covariances
     for i in range(self.num_obj):
         theta[i]                = np.log(np.std(self.train_y[:, i]) / 2)
         theta[self.num_obj + i] = np.log(np.std(self.train_y[:, i]))
     # lengthscales
     for i in range(self.dim):
         theta[2 * self.num_obj + i] = np.maximum(-100, np.log(0.5 * (self.train_x[i, :].max() - self.train_x[i, :].min())))
     return theta
Beispiel #20
0
    def load_data(self, csvname):
        data = np.loadtxt(csvname, delimiter=',').T
        self.x = data[:, :-1:]
        self.y = data[:, -1:]

        # center input
        mean1 = np.mean(self.x[:, 0])
        mean2 = np.mean(self.x[:, 1])
        std1 = np.std(self.x[:, 0])
        std2 = np.std(self.x[:, 1])
        self.x[:, 0] -= mean1
        self.x[:, 0] /= std1
        self.x[:, 1] -= mean2
        self.x[:, 1] /= std2
    def test_e_log_lik(self):
        n_test_samples = 10000

        # Our expected log likelihood should only differ from a sample average
        # of the generated log likelihood by a constant as the parameters
        # vary.  Check this using num_param different random parameters.
        num_params = 5
        ell_by_param = np.full(num_params, float('nan'))
        sample_ell_by_param = np.full(num_params, float('nan'))
        standard_error = 0.
        for i in range(num_params):
            tau, nu, phi_mu, phi_var = \
                vi.initialize_parameters(num_samples, x_dim, k_approx)
            phi_var_expanded = np.array([phi_var for d in range(x_dim)])

            # set vb parameters
            vb_params2['phi'].set_vector(
                np.hstack([np.ravel(phi_mu.T), phi_var]))
            vb_params2['pi'].set_vector(np.ravel(tau))
            vb_params2['nu'].set_vector(np.ravel(nu))

            z_sample, a_sample, pi_sample = \
                vi.generate_parameter_draws(nu, phi_mu, phi_var_expanded, \
                                            tau, n_test_samples)

            sample_e_log_lik = [
                vi.log_lik(x, z_sample[n, :, :], a_sample[n, :, :],
                           pi_sample[n, :], sigma_eps, sigma_a, alpha,
                           k_approx) \
                for n in range(n_test_samples) ]

            sample_ell_by_param[i] = np.mean(sample_e_log_lik)
            standard_error = \
                np.max([ standard_error,
                         np.std(sample_e_log_lik) / np.sqrt(n_test_samples) ])

            # get moments
            e_log_pi1, e_log_pi2, phi_moment1, phi_moment2, nu_moment =\
                            vi.get_moments_VB(vb_params2)

            ell_by_param[i] = vi.exp_log_likelihood(nu_moment, phi_moment1,
                                                    phi_moment2, e_log_pi1,
                                                    e_log_pi2, sigma_a,
                                                    sigma_eps, x, alpha)

        print('Mean log likelihood standard error: %0.5f' % standard_error)
        self.assertTrue(np.std(ell_by_param - sample_ell_by_param) < \
                        3. * standard_error)
Beispiel #22
0
def runSB(psf, psf_k, imageArray):
    nImages = np.shape(imageArray)[2]
    results = imageArray * 0

    for imageIdx in range(0, nImages):
        if imageIdx < start:
            continue
        grndpath = '/home/moss/SMLM/data/fluorophores/frames/' + str(
            imageIdx + 1).zfill(5) + '.csv'
        grnd = Table.read(grndpath, format='ascii')
        no_source = len(grnd['xnano'])
        img = imageArray[:, :, imageIdx]
        sub = img - np.average(img[img < np.average(img) + 3 * np.std(img)])
        subnorm = sub / np.max(sub)
        mock = makeMock(grnd)
        #mocknorm = mock/np.max(mock);
        sb = SparseBayes(subnorm, psf, psf_k, no_source)
        #sb = SparseBayes_alpha(mock,psf,psf_k,no_source);
        #sb = SparseBayes_nofft(mock,psf,psf_k,sig_psf,no_source);
        #sb = SparseBayes_gaussian(subnorm,psf,psf_k);
        results[:, :, imageIdx] = sb.res
        s = 'nopri' + str(imageIdx + 1).zfill(5) + '.out'
        np.savetxt(s, sb.res)
        plt.imshow(results[:, :, imageIdx])
        plt.show()
    return results
Beispiel #23
0
 def get_default_theta(self):
     if self.k:  # kernel2 MF
         # sn2 + (output_scale + lengthscale) + (output_scale + lengthscales) * 2
         theta = np.random.randn(3 + 2 * self.dim)
         theta[2] = np.maximum(
             -100,
             np.log(0.5 * (self.train_x[self.dim - 1].max() -
                           self.train_x[self.dim - 1].min())))
         for i in range(self.dim - 1):
             tmp = np.maximum(
                 -100,
                 np.log(0.5 *
                        (self.train_x[i].max() - self.train_x[i].min())))
             theta[4 + i] = tmp
             theta[4 + self.dim + i] = tmp
     else:  # kernel1 RBF
         # sn2 + output_scale + lengthscales
         theta = np.random.randn(2 + self.dim)
         for i in range(self.dim):
             theta[2 + i] = np.maximum(
                 -100,
                 np.log(0.5 *
                        (self.train_x[i].max() - self.train_x[i].min())))
     theta[0] = np.log(np.std(self.train_y) + 0.000001)  # sn2
     return theta
Beispiel #24
0
 def rand_theta(self, scale):
     if self.k:  # kernel2
         # sn2 + (output_scale + lengthscale) + (output_scale + lengthscales) * 2
         # 1 + 2 + (1 + self.dim - 1)*2 = 3 + 2*self.dim
         theta = scale * np.random.randn(3 + 2 * self.dim)
         theta[2] = np.maximum(
             -100,
             np.log(0.5 * (self.train_x[self.dim - 1].max() -
                           self.train_x[self.dim - 1].min())))
         for i in range(self.dim - 1):
             tmp = np.maximum(
                 -100,
                 np.log(0.5 *
                        (self.train_x[i].max() - self.train_x[i].min())))
             theta[4 + i] = tmp
             theta[4 + self.dim + i] = tmp
     else:  # kernel1 RBF
         # sn2 + output_scale + lengthscales, 1 + 1 + self.dim
         theta = scale * np.random.randn(2 + self.dim)
         for i in range(self.dim):
             theta[2 + i] = np.maximum(
                 -100,
                 np.log(0.5 *
                        (self.train_x[i].max() - self.train_x[i].min())))
     theta[0] = np.log(np.std(self.train_y))  # sn2
     # theta[1] = np.log(np.std(self.train_y))
     return theta
Beispiel #25
0
def normalize_array(A):
    mean, std = np.mean(A), np.std(A)
    A_normed = (A - mean) / std
    def restore_function(X):
        return X * std + mean

    return A_normed, restore_function
Beispiel #26
0
def normalize_array(A):
    mean, std = np.mean(A), np.std(A)
    A_normed = (A - mean) / std
    def restore_function(X):
        return X * std + mean

    return A_normed, restore_function
 def _prepare_starting_parameter(self, dataset):
     self.d = dimension_datasets[dataset.name]
     m = np.mean(dataset.actions_train)
     std = np.std(dataset.actions_train)
     self.scale = 1e-1
     v = std * self.scale
     return m, v
Beispiel #28
0
def normalizeFeatures(X_train, X_test):
    mean_X_train = np.mean(X_train, 0)
    std_X_train = np.std(X_train, 0)
    std_X_train[ std_X_train == 0 ] = 1
    X_train_normalized = (X_train - mean_X_train) / std_X_train
    X_test_normalized = (X_test - mean_X_train) / std_X_train
    return X_train_normalized, X_test_normalized
Beispiel #29
0
 def validate(self, X, Y):
     testError = []
     for i in range(len(X)):
         result = self.forward(X[i])
         cost = self.getCostValue(result, Y[i])
         testError.append(cost)
     return np.mean(testError), np.std(testError), testError
Beispiel #30
0
    def standardize(self, x, y=[]):

        # Extract or use existing normal parameters to normalize samples
        if len(self.x_mean) == 0:
            self.x_mean = np.mean(x, axis=0)
            self.x_std = np.std(x, axis=0)
        x = (x - self.x_mean) / self.x_std
        # Extract or use existing normal parameters to normalize targets if there are any
        if len(y) != 0:
            if len(self.y_mean) == 0:
                self.y_mean = np.mean(y, axis=0)
                self.y_std = np.std(y, axis=0)
            y = (y - self.y_mean) / self.y_std
            return x, y
        else:
            return x
Beispiel #31
0
def check_num_snps(sampled_n_dict,
                   demo,
                   num_loci,
                   mut_rate,
                   ascertainment_pop=None,
                   error_matrices=None):
    if error_matrices is not None or ascertainment_pop is not None:
        # TODO
        raise NotImplementedError

    #seg_sites = momi.simulate_ms(
    #    ms_path, demo, num_loci=num_loci, mut_rate=mut_rate)
    #sfs = seg_sites.sfs

    num_bases = 1000
    sfs = demo.simulate_data(sampled_n_dict=sampled_n_dict,
                             muts_per_gen=mut_rate / num_bases,
                             recoms_per_gen=0,
                             length=num_bases,
                             num_replicates=num_loci)._sfs

    n_sites = sfs.n_snps(vector=True)

    n_sites_mean = np.mean(n_sites)
    n_sites_sd = np.std(n_sites)

    # TODO this test isn't very useful because expected_branchlen is not used anywhere internally anymore
    n_sites_theoretical = demo.expected_branchlen(sampled_n_dict) * mut_rate
    #n_sites_theoretical = momi.expected_total_branch_len(
    #    demo, ascertainment_pop=ascertainment_pop, error_matrices=error_matrices) * mut_rate

    zscore = -np.abs(n_sites_mean - n_sites_theoretical) / n_sites_sd
    pval = scipy.stats.norm.cdf(zscore) * 2.0

    assert pval >= .05
Beispiel #32
0
 def _stabilize_x(self):
     """Fix the rotation according to the SVD.
     """
     U, _, _ = np.linalg.svd(self.X, full_matrices=False)
     L = np.linalg.cholesky(np.cov(U.T) + 1e-6 * np.eye(self.D)).T
     self.X = np.linalg.solve(L, U.T).T
     self.X /= np.std(self.X, axis=0)
Beispiel #33
0
def simulator(theta,N):
    #get 500*N exponentials
    exponentials = np.random.exponential(1/theta,size=N*M)
    #reshape to Nx500
    exponentials = np.reshape(exponentials,(N,M))
    #get means of the rows
    summaries = np.mean(exponentials,1)
    std = np.std(exponentials,1)
    return summaries, std
Beispiel #34
0
def collect_test_losses(num_folds):
    # Run this after CV results are in. e.g:
    # python -c "from deepmolecule.util import collect_test_losses; collect_test_losses(10)"
    results = {}
    for net_type in ['conv', 'morgan']:
        results[net_type] = []
        for expt_ix in range(num_folds):
            fname = "Final_test_loss_{0}_{1}.pkl.save".format(expt_ix, net_type)
            try:
                with open(fname) as f:
                    results[net_type].append(pickle.load(f))
            except IOError:
                print "Couldn't find file {0}".format(fname)

    print "Results are:"
    print results
    print "Means:"
    print {k : np.mean(v) for k, v in results.iteritems()}
    print "Std errors:"
    print {k : np.std(v) / np.sqrt(len(v) - 1) for k, v in results.iteritems()}
def calculate_func_mean_and_variance(func_samples):
	''' Simple helper function that will calulate the mean and variance 
		of a collection of functions
		INPUT:
			func_samples: an nd-array of functions, functions are along the columns
		OUTPUT:
			func_mean  : the mean function of func_samples
			func_lower : the lower bound (statistically) of func_samples
			func_upper : the upper bound (statistically) of func_samples
		----------------------------------------------------------------------------
		Notes:
			I use 3sigma variance, where 99.7 of the spread is accounted for.
	'''

	func_mean = np.mean(func_samples,axis=1)
	func_std = np.std(func_samples,axis=1)
	func_lower = func_mean-(3*func_std)
	func_upper = func_mean+(3*func_std)

	return func_mean, func_lower, func_upper
Beispiel #36
0
def plot_runtime(ex, fname, func_xvalues, xlabel, func_title=None):
    results = glo.ex_load_result(ex, fname)
    value_accessor = lambda job_results: job_results['time_secs']
    vf_pval = np.vectorize(value_accessor)
    # results['test_results'] is a dictionary: 
    # {'test_result': (dict from running perform_test(te) '...':..., }
    times = vf_pval(results['test_results'])
    repeats, _, n_methods = results['test_results'].shape
    time_avg = np.mean(times, axis=0)
    time_std = np.std(times, axis=0)

    xvalues = func_xvalues(results)

    #ns = np.array(results[xkey])
    #te_proportion = 1.0 - results['tr_proportion']
    #test_sizes = ns*te_proportion
    line_styles = exglo.func_plot_fmt_map()
    method_labels = exglo.get_func2label_map()
    
    func_names = [f.__name__ for f in results['method_job_funcs'] ]
    for i in range(n_methods):    
        te_proportion = 1.0 - results['tr_proportion']
        fmt = line_styles[func_names[i]]
        #plt.errorbar(ns*te_proportion, mean_rejs[:, i], std_pvals[:, i])
        method_label = method_labels[func_names[i]]
        plt.errorbar(xvalues, time_avg[:, i], yerr=time_std[:,i], fmt=fmt,
                label=method_label)
            
    ylabel = 'Time (s)'
    plt.ylabel(ylabel)
    plt.xlabel(xlabel)
    plt.gca().set_yscale('log')
    plt.xlim([np.min(xvalues), np.max(xvalues)])
    plt.xticks( xvalues, xvalues)
    plt.legend(loc='best')
    title = '%s. %d trials. '%( results['prob_label'],
            repeats ) if func_title is None else func_title(results)
    plt.title(title)
    #plt.grid()
    return results
Beispiel #37
0
def do_shift_CV(num_folds, model, shift_X, shift_y):
    from scipy import interp
    import matplotlib
    import python_utils.python_utils.caching as caching
    from sklearn.metrics import roc_curve, auc
    import pandas as pd
    matplotlib.rcParams.update({'font.size': 16})
    fig, ax = plt.subplots()
    fpr_points = np.linspace(0.,1.,101)
    roc_curves = []
    auc_vals = []
    log_losses = []
    color = 'r'
    label = 'asdf'
    from sklearn.cross_validation import KFold
    outer_cv = sklearn.cross_validation.KFold(len(shift_X), num_folds)
    for (i,(train, test)) in enumerate(reversed(list(outer_cv))):
        print('cv', i)
        shift_X_train, shift_y_train = [shift_X[idx] for idx in train], [shift_y[idx] for idx in train]
        shift_X_test, shift_y_test = [shift_X[idx] for idx in test], [shift_y[idx] for idx in test]
        model.fit(shift_X_train, shift_y_train)
        target_y_test_hat = model.predict(shift_X_test)
        source_X_test, target_X_test, source_y_test, target_y_test = shift_Xy_to_matrices(shift_X_test, shift_y_test)
        fpr, tpr, thresholds = roc_curve(target_y_test, target_y_test_hat)
        log_losses.append(logloss(target_y_test, target_y_test_hat))
        caching.fig_archiver.log_text('log_losses')
        caching.fig_archiver.log_text(log_losses)
        auc_vals.append(auc(fpr,tpr))
        caching.fig_archiver.log_text('auc_vals')
        caching.fig_archiver.log_text(auc_vals)
        roc_curves.append(interp(fpr_points, fpr, tpr))
        ax.plot(fpr, tpr, color = color, alpha=0.75, zorder=np.random.random())
        caching.fig_archiver.archive_fig(fig)
    roc_curves_df = pd.DataFrame(roc_curves, columns=fpr_points)
    ax.plot(fpr_points, roc_curves_df.mean(), linewidth=5, color=color, label=label)
    ax.set_xlabel('fpr',fontsize=22)
    ax.set_ylabel('tpr',fontsize=22)
    print(pd.Series(auc_vals), 'mean:', np.mean(auc_vals), 'std:', np.std(auc_vals))
    caching.fig_archiver.archive_fig(fig)
Beispiel #38
0
def batch_normalize(x):
    """
    Batch normalizes the matrix along the data axis.
    """
    mbmean = np.mean(x, axis=0, keepdims=True)
    return (x - mbmean) / (np.std(x, axis=0, keepdims=True) + 1)
def batch_normalize(activations):
    mbmean = np.mean(activations, axis=0, keepdims=True)
    return (activations - mbmean) / (np.std(activations, axis=0, keepdims=True) + 1)
Beispiel #40
0
if __name__ == '__main__':
    random = 1 
    
    n_samples = 10 
    n_samples_to_test = 100
    num_pseudo_params = 50 

    dimensions =[1,1,1]
    n_layers = len(dimensions)-1 

    npr.seed(0) #Randomness comes from KMeans
    rs = npr.RandomState(0)

    motor = np.genfromtxt('motor.csv', delimiter=',',skip_header = True)
    X = motor[:,1]
    X = (X - np.mean(X))/(np.std(X))
    X = X.reshape(len(X),1)

    y = motor[:,2]
    y = (y-np.mean(y))/(np.std(y))
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 42)

    total_num_params, log_likelihood, sample_mean_cov_from_deep_gp, predict_layer_funcs, squared_error, create_deep_map = \
            build_deep_gp(dimensions, rbf_covariance, num_pseudo_params, random)

    init_params = .1 * npr.randn(total_num_params)
    deep_map = create_deep_map(init_params)

    init_params = initialize(deep_map,X,num_pseudo_params)
    print("Optimizing covariance parameters...")
    objective = lambda params: -log_likelihood(params,X,y,n_samples)
Beispiel #41
0
 def forward_pass(self, data, params):
     mean, std = params.get(self.get_params_shape())
     data = (data - np.mean(data, axis=1, keepdims=True)) / np.std(data, axis=1, keepdims=True)
     return (data * std) + mean
Beispiel #42
0
def normalize(v, mean, std):
    standard = (v - np.mean(v, 1).reshape(-1, 1)) / np.std(v, 1).reshape(-1, 1)
    return standard * std + mean
Beispiel #43
0
def simulator(theta,v):
    #handle more than one simulation per sample
    v = v.reshape(-1,M)
    simulations =-np.log(v)/theta
    return np.mean(simulations,1), np.std(simulations,1)
Beispiel #44
0
    star_mags = np.array([du.colors_to_mags(r, c) 
                  for r, c in zip(coadd_df.star_mag_r.values,
                      coadd_df[['star_color_%s'%c for c in colors]].values)])

    gal_mags  = np.array([du.colors_to_mags(r, c) 
                    for r, c in zip(coadd_df.gal_mag_r.values,
                        coadd_df[['gal_color_%s'%c for c in colors]].values)])

    # look at galaxy fluxes regressed on stars
    x = star_mags[coadd_df.is_star.values]
    y = gal_mags[coadd_df.is_star.values]
    star_mag_model = LinearRegression()
    star_mag_model.fit(x, y)
    star_residuals = star_mag_model.predict(x) - y
    star_mag_model.res_covariance = np.cov(star_residuals.T)
    star_resids    = np.std(star_mag_model.predict(x) - y, axis=0)
    with open('star_mag_proposal.pkl', 'wb') as f:
        pickle.dump(star_mag_model, f)

    for i in xrange(5): 
        plt.scatter(star_mag_model.predict(x)[:,i], y[:,i], label=i, c=sns.color_palette()[i])

    plt.legend()
    plt.show()

    # look at star fluxes regressed on galaxy fluxes
    x = gal_mags[~coadd_df.is_star.values]
    y = star_mags[~coadd_df.is_star.values]
    gal_mag_model = LinearRegression()
    gal_mag_model.fit(x, y)
    gal_residuals = gal_mag_model.predict(x) - y
Beispiel #45
0
    num_particles = 10
    K=10
    convergence=1e-05
    paramsAVABC,lower_boundsAVABC,i,all_gradientsAVABC = AVABC(params,num_samples,num_particles,K+40,convergence)
    paramsBBVI,lower_boundsBBVI,iBBVI,all_gradientsBBVI = BBVI(params,num_samples,num_particles,K+40,convergence)
    print params
    print "true mean"
    print (k+1.)/(n+2.)
    a = k+1
    b = n-k+1
    print 'i AVABC'
    print len(lower_boundsAVABC)
    print 'i BBVI'
    print len(lower_boundsBBVI)
    print 'AVABC gradient std'
    print np.std(np.array(all_gradientsAVABC))
    print 'BBVI gradient std'
    print np.std(np.array(all_gradientsBBVI))
    x = np.linspace(0,1,100)
    plt.plot(lower_boundsBBVI,label='BBVI S=%i, sim=%i' % (num_samples,num_particles),color='red')
    plt.plot(lower_boundsAVABC,label='AVABC  S=%i, sim=%i' % (num_samples,num_particles),color='blue')
    plt.title('Beta-Bernoulli Lower Bound')
    plt.legend(loc=4)
    #plt.ylim((-25000,0))
    plt.show()

#    fig, ax = plt.subplots(1, 1)
#    plt.plot(x,beta.pdf(x, a,b),'--',color='red',label='true')
#    plt.plot(x,kumaraswamy_pdf(x,params),'-',color='blue',label='VI true likelihood')
    plt.plot(x, beta.pdf(x, a,b),label='true posterior',color='green')
    plt.plot(x,kumaraswamy_pdf(x,paramsAVABC),label='AVABC',color='blue')