Ejemplo n.º 1
0
def run_luxus_HMC(sigmaB2, lux_data, stan_model_file, plot_file_name, covariate_to_be_tested, N_outputsamples, N_chains, diagnostic_plots,testtype,test2cov,N_cytosines):

    time_start=time.time()
    fit = stan_cache(stan_model_file, data=lux_data, iter=N_outputsamples, chains=N_chains)
    samples=fit.extract(permuted=True)

    if testtype==1:
        bf=calculate_savagedickey_kde_1d(numpy.zeros((1,)),sigmaB2*numpy.eye(1),samples['B'][:,covariate_to_be_tested])
    else:
        bf=calculate_savagedickey_kde_1d(numpy.zeros((1,)),sigmaB2*numpy.eye(1),samples['B'][:,covariate_to_be_tested]-samples['B'][:,test2cov])

    time_end_full=time.time()
    print(fit)

    if diagnostic_plots==1:
        if N_cytosines ==1:
            vars_to_be_printed=['l','sigmaE2','sigmaR2','B']
        else:
            vars_to_be_printed=['l','sigmaE2','sigmaC2','sigmaR2','B']
        
        fit.plot(vars_to_be_printed)
        pyplot.tight_layout()
        pyplot.savefig(plot_file_name)

    sigmaR2_mean=numpy.mean(samples['sigmaR2'])
    sigmaE2_mean=numpy.mean(samples['sigmaE2'])
    
    if N_cytosines==1:
        sigmaC2_mean=numpy.nan
    else:
        sigmaC2_mean=numpy.mean(samples['sigmaC2'])

    runtime=time_end_full-time_start
	
    return bf, runtime, sigmaR2_mean, sigmaC2_mean, sigmaE2_mean;	
Ejemplo n.º 2
0
def run_luxus_HMC(sigmaB2, lux_data, diff, stan_file_name, BF_output_file,
                  name_to_BF_file, isWindow, N_cyt_in_analysis, cytosineIndex,
                  d_file, B_file, plot_file_name, modelNro, N_predictors):
    #cytosineIndex is only needed when N_cyt_in_analysis=1, otherwise any value can be given.
    #ModelNro: 2 means LuxUS, 3 means LuxUS sep

    time_start = time.time()
    fit = stan_cache(stan_file_name, data=lux_data, iter=1000, chains=4)
    time_end = time.time()
    samples = fit.extract(permuted=True)
    numpy.savetxt("%s_%s_%s.txt" % (B_file, name_to_BF_file, cytosineIndex),
                  samples['B'][:, 1],
                  delimiter=',')

    if isWindow == 1:
        bf = calculate_savagedickey_kde_1d(numpy.zeros((1, )),
                                           sigmaB2 * numpy.eye(1),
                                           samples['B'][:, 1])
        with open(BF_output_file, 'a+') as fw:
            fw.write("%f\t%f\t%s\n" % (bf, diff, name_to_BF_file))
    else:

        for c_ind in range(0, N_cyt_in_analysis):
            bf = calculate_savagedickey_kde_1d(
                numpy.zeros((1, )), sigmaB2 * numpy.eye(1),
                samples['B'][:, c_ind * N_predictors + 1])
            if N_cyt_in_analysis == 1:
                with open(BF_output_file, 'a+') as f:
                    f.write("%f\t%f\t%f\t%s\n" %
                            (bf, diff, cytosineIndex, name_to_BF_file))
            else:
                with open(BF_output_file, 'a+') as f:
                    f.write("%f\t%f\t%f\t%s\n" %
                            (bf, diff, c_ind, name_to_BF_file))

    time_end_full = time.time()

    print(fit)

    print(samples.keys())

    if modelNro == 2:
        #LuxUS
        vars_to_be_printed = ['l', 'sigmaE2', 'sigmaC2', 'sigmaR2', 'B']
        fit.plot(vars_to_be_printed)
        pyplot.tight_layout()
    else:
        #LuxUS sep
        vars_to_be_printed = ['sigmaE2', 'sigmaR2', 'B']
        fit.plot(vars_to_be_printed)
        pyplot.tight_layout()

        savefig(plot_file_name)

        thetas = samples['theta'].copy()
    theta_mean = numpy.mean(thetas, 1)

    return theta_mean, time_start, time_end, time_end_full
Ejemplo n.º 3
0
def run_luxus_HMC(sigmaB2, lux_data, stan_model_file, plot_file_name,
                  covariate_to_be_tested, N_outputsamples, N_chains,
                  diagnostic_plots, testtype, test2cov, N_cytosines,
                  N_predictors):

    time_start = time.time()
    fit = stan_cache(stan_model_file,
                     data=lux_data,
                     iter=N_outputsamples,
                     chains=N_chains)
    samples = fit.extract(permuted=True)

    bf = numpy.zeros(N_cytosines)

    if testtype == 1:
        for c_ind in range(0, N_cytosines):
            bf[c_ind] = calculate_savagedickey_kde_1d(
                numpy.zeros((1, )), sigmaB2 * numpy.eye(1),
                samples['B'][:, 2 * c_ind + covariate_to_be_tested])
    else:
        for c_ind in range(0, N_cytosines):
            bf[c_ind] = calculate_savagedickey_kde_1d(
                numpy.zeros((1, )), sigmaB2 * numpy.eye(1),
                samples['B'][:, 2 * c_ind + covariate_to_be_tested] -
                samples['B'][:, 2 * c_ind + test2cov])

    time_end_full = time.time()
    print(fit)

    if diagnostic_plots == 1:
        if N_cytosines == 1:
            vars_to_be_printed = ['sigmaE2', 'sigmaR2', 'B']
        else:
            vars_to_be_printed = ['l', 'sigmaE2', 'sigmaR2', 'B', 'd']

        fit.plot(vars_to_be_printed)
        pyplot.tight_layout()
        pyplot.savefig(plot_file_name)

    sigmaR2_mean = numpy.mean(samples['sigmaR2'])
    sigmaE2_mean = numpy.mean(samples['sigmaE2'])
    theta_median = numpy.median(samples['theta'], axis=0)
    theta_mean = numpy.mean(samples['theta'], axis=0)

    if N_cytosines > 1:
        d_median = numpy.median(samples['d'], axis=0)
        d_mean = numpy.mean(samples['d'], axis=0)
    else:
        d_median = numpy.array([0])
        d_mean = numpy.array([0])

    runtime = time_end_full - time_start

    return bf, runtime, sigmaR2_mean, sigmaE2_mean, d_median[::
                                                             N_predictors], theta_median, d_mean[::
                                                                                                 N_predictors], theta_mean
Ejemplo n.º 4
0
def run_luxus_VI(lux_data, model_name, N_cytosines, N_predictors, N_replicates,
                 N_gradsamples, N_elbosamples, N_outputsamples,
                 temp_input_data_file_name, temp_output_file_name, sigmaB2,
                 isWindow, name_to_BF_file, BF_output_file, diff,
                 cytosineIndex, d_file, B_file):

    time_start = time.time()
    pystan.misc.stan_rdump(lux_data, temp_input_data_file_name)
    subprocess.call(
        "./%s variational grad_samples=%s elbo_samples=%s output_samples=%s data file=%s output file=%s diagnostic_file=%s"
        % (model_name, N_gradsamples, N_elbosamples, N_outputsamples,
           temp_input_data_file_name, temp_output_file_name, d_file),
        shell=True)
    time_end = time.time()
    samples = csvIntoExtractDict(temp_output_file_name, 0)

    if isWindow == 1:
        samples_B = extractVariable1dim(samples, 'B', N_predictors,
                                        N_outputsamples)
        bf = calculate_savagedickey_kde_1d(numpy.zeros((1, )),
                                           sigmaB2 * numpy.eye(1),
                                           samples_B[:, 1])
        with open(BF_output_file, 'a+') as fw:
            fw.write("%f\t%f\t%s\n" % (bf, diff, name_to_BF_file))
    else:
        samples_B = extractVariable1dim(samples, 'B',
                                        N_predictors * N_cytosines,
                                        N_outputsamples)
        for c_ind in range(0, N_cytosines):
            bf = calculate_savagedickey_kde_1d(
                numpy.zeros((1, )), sigmaB2 * numpy.eye(1),
                samples_B[:, c_ind * N_predictors + 1])
            with open(BF_output_file, 'a+') as f:
                if N_cytosines == 1:
                    f.write("%f\t%f\t%f\t%s\n" %
                            (bf, diff, cytosineIndex, name_to_BF_file))
                else:
                    f.write("%f\t%f\t%f\t%s\n" %
                            (bf, diff, c_ind, name_to_BF_file))

    time_end_full = time.time()

    numpy.savetxt("%s_%s_%s.txt" % (B_file, name_to_BF_file, cytosineIndex),
                  samples_B,
                  delimiter=',')

    samples_theta = extractVariable1dim(samples, 'theta',
                                        2 * N_replicates * N_cytosines,
                                        N_outputsamples)
    theta_mean = numpy.mean(samples_theta, 1)

    subprocess.call("rm %s" % (temp_input_data_file_name), shell=True)
    subprocess.call("rm %s" % (temp_output_file_name), shell=True)

    return theta_mean, time_start, time_end, time_end_full
Ejemplo n.º 5
0
def run_luxus_VI(lux_data, model_name, N_gradsamples, N_elbosamples,
                 N_outputsamples, temp_input_data_file_name,
                 temp_output_file_name, sigmaB2, diagnostic_plots,
                 plot_file_name, N_predictors, N_reps, covariate_to_be_tested,
                 testtype, test2cov, N_cytosines):

    time_start = time.time()
    pystan.misc.stan_rdump(lux_data, temp_input_data_file_name)
    subprocess.call(
        "./%s variational grad_samples=%s elbo_samples=%s output_samples=%s data file=%s output file=%s"
        % (model_name, N_gradsamples, N_elbosamples, N_outputsamples,
           temp_input_data_file_name, temp_output_file_name),
        shell=True)
    samples = csvIntoExtractDict(temp_output_file_name, 0)

    samples_B = extractVariable1dim(samples, 'B', N_predictors * N_cytosines,
                                    N_outputsamples)

    bf = numpy.zeros(N_cytosines)

    if testtype == 1:
        bf = calculate_savagedickey_kde_1d(
            numpy.zeros((1, )), sigmaB2 * numpy.eye(1),
            samples_B[:, covariate_to_be_tested])
    else:
        bf = calculate_savagedickey_kde_1d(
            numpy.zeros((1, )), sigmaB2 * numpy.eye(1),
            samples_B[:, covariate_to_be_tested] - samples_B[:, test2cov])

    time_end_full = time.time()

    samples_sigmaR2 = extractVariable1dim(samples, 'sigmaR2', 1,
                                          N_outputsamples)
    samples_sigmaE2 = extractVariable1dim(samples, 'sigmaE2', 1,
                                          N_outputsamples)
    samples_sigmaC2 = extractVariable1dim(samples, 'sigmaC2', 1,
                                          N_outputsamples)
    samples_theta = extractVariable1dim(samples, 'theta', N_cytosines * N_reps,
                                        N_outputsamples)

    sigmaR2_mean = numpy.mean(samples_sigmaR2)
    sigmaE2_mean = numpy.mean(samples_sigmaE2)
    theta_median = numpy.median(samples_theta, axis=0)

    if diagnostic_plots == 1:

        pyplot.subplot(5, 1, 1)
        pyplot.hist(samples_sigmaR2, bins=20)
        pyplot.title(r'$\sigma_R^2$')
        pyplot.show()

        pyplot.subplot(5, 1, 2)
        pyplot.hist(samples_sigmaE2, bins=20)
        pyplot.title(r'$\sigma_E^2$')
        pyplot.show()

        pyplot.subplot(5, 1, 3)
        pyplot.hist(samples_sigmaC2, bins=20)
        pyplot.title(r'$\sigma_C^2$')
        pyplot.show()

        pyplot.subplot(5, 1, 4)
        for b_ind in range(0, N_predictors):
            pyplot.hist(samples_B[b_ind], bins=20, label="b_%s" % (b_ind))
        pyplot.legend(loc='upper right')

        if N_cytosines > 1:
            samples_l = extractVariable1dim(samples, 'l', 1, N_outputsamples)

            pyplot.subplot(5, 1, 5)
            pyplot.hist(samples_l, bins=20)
            pyplot.title(r'$\ell$')
            pyplot.show()

        pyplot.title(r'$\mathbf{b}$')
        pyplot.show()
        pyplot.savefig(plot_file_name)

    subprocess.call("rm %s" % (temp_input_data_file_name), shell=True)
    subprocess.call("rm %s" % (temp_output_file_name), shell=True)

    runtime = time_end_full - time_start

    return bf, runtime, sigmaR2_mean, sigmaE2_mean, theta_median