Exemple #1
0
def do_plots(drug_channel):
    top_drug, top_channel = drug_channel

    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(6,3), sharey=True, sharex=True)
    ax1.grid()
    ax2.grid()
    fs = 16
    ax1.set_xlabel(r'$\sigma$', fontsize=fs)
    ax2.set_xlabel(r'$\sigma$', fontsize=fs)
    ax1.set_title("Model 1")
    ax2.set_title("Model 2")
    ax1.set_ylabel("Normalised frequency")
    
    model = 1
    drug,channel,chain_file,images_dir = dr.nonhierarchical_chain_file_and_figs_dir(model, top_drug, top_channel, temperature)
    
    sigmas = np.loadtxt(chain_file, usecols=[1])
    ax1.hist(sigmas, bins=40, normed=True, color='blue', edgecolor='blue')
    
    model = 2
    drug,channel,chain_file,images_dir = dr.nonhierarchical_chain_file_and_figs_dir(model, top_drug, top_channel, temperature)
    
    sigmas = np.loadtxt(chain_file, usecols=[2])
    ax2.hist(sigmas, bins=40, normed=True, color='blue', edgecolor='blue')

    fig.tight_layout()
    fig.savefig("{}_{}_nonh_both_models_sigma_hists.png".format(drug, channel))
    plt.show(block=True)
    plt.close()
    
    return None
Exemple #2
0
def compute_log_py_approxn(temp):
    print temp
    drug, channel, chain_file, images_dir = dr.nonhierarchical_chain_file_and_figs_dir(
        m, top_drug, top_channel, temp)
    chain = np.loadtxt(chain_file, usecols=range(dr.num_params))
    num_its = chain.shape[0]
    total = 0.
    start = 0
    for it in xrange(start, num_its):
        temperature = 1  # approximating full likelihood
        temp_bit = dr.log_data_likelihood(responses, where_r_0, where_r_100,
                                          where_r_other, concs, chain[it, :],
                                          temperature, pi_bit)
        total += temp_bit
        if temp_bit == -np.inf:
            print chain[it, :]
    answer = total / (num_its - start)
    if answer == -np.inf:
        print "ANSWER IS -INF"
    return answer
Exemple #3
0
dr.setup(args.data_file)
drugs_to_run, channels_to_run = dr.list_drug_channel_options(args.all)

num_x_pts = 50
alpha = 0.002  # this is the lowest value I've found that actually shows anything

for drug, channel in it.product(drugs_to_run, channels_to_run):

    try:
        num_expts, experiment_numbers, experiments = dr.load_crumb_data(
            drug, channel)
    except:
        print "\nCan't load experimental data for {} + {} --- skipping\n".format(
            drug, channel)
        continue
    drug, channel, chain_file, images_dir = dr.nonhierarchical_chain_file_and_figs_dir(
        args.model, drug, channel, temperature)

    concs = np.array([])
    responses = np.array([])
    for i in xrange(num_expts):
        concs = np.concatenate((concs, experiments[i][:, 0]))
        responses = np.concatenate((responses, experiments[i][:, 1]))

    min_x = int(np.log10(np.min(concs))) - 1
    max_x = int(np.log10(np.max(concs))) + 2

    try:
        chain = np.loadtxt(chain_file)
    except:
        print "\nCan't find/load chain file for {} + {}, model {} --- skipping\n".format(
            drug, channel, args.model)
Exemple #4
0
def run_single_level(drug_channel):

    drug, channel = drug_channel

    print "\n\n{} + {}\n\n".format(drug, channel)

    seed = 100

    try:
        num_expts, experiment_numbers, experiments = dr.load_crumb_data(
            drug, channel)
    except:
        print "Problem loading data, guessing there are no entries for {} + {} --- skipping".format(
            drug, channel)
        return None

    drug, channel, chain_file, images_dir = dr.nonhierarchical_chain_file_and_figs_dir(
        args.model, drug, channel, temperature)

    concs = np.array([])
    responses = np.array([])
    for i in xrange(num_expts):
        concs = np.concatenate((concs, experiments[i][:, 0]))
        responses = np.concatenate((responses, experiments[i][:, 1]))

    if np.any(np.isnan(responses)):
        print "Skipping {} because of empty responses / missing data".format(
            drug_channel)
        return None

    #print experiments
    #print concs
    #print responses

    where_r_0 = responses == 0
    where_r_100 = responses == 100
    where_r_other = (0 < responses) & (responses < 100)

    #print "where_r_0:", where_r_0
    #print "where_r_100:", where_r_100
    #print "where_r_other:", where_r_other

    pi_bit = dr.compute_pi_bit_of_log_likelihood(where_r_other)

    # plot priors
    for i in xrange(num_params):
        fig = plt.figure(figsize=(4, 3))
        ax = fig.add_subplot(111)
        ax.grid()
        ax.plot(dr.prior_xs[i], dr.prior_pdfs[i], color='blue', lw=2)
        ax.set_xlabel(dr.labels[i])
        ax.set_ylabel("Prior pdf")
        fig.tight_layout()
        fig.savefig(images_dir + dr.file_labels[i] + "_prior_pdf.pdf")
        plt.close()

    start = time.time()

    sigma0 = 0.1
    opts = cma.CMAOptions()
    opts['seed'] = seed
    if args.model == 1:
        #x0 = np.array([2.5, 3.])
        x0 = np.array([2.5, 1.])
        es = cma.CMAEvolutionStrategy(x0, sigma0, opts)
        while not es.stop():
            X = es.ask()
            #es.tell(X, [-dr.log_target(responses, where_r_0, where_r_100, where_r_other, concs, x**2 + [dr.pic50_exp_lower,dr.sigma_uniform_lower], temperature, pi_bit) for x in X])
            es.tell(X, [
                sum_of_square_diffs([x[0]**2 + dr.pic50_exp_lower, 1.], concs,
                                    responses) for x in X
            ])
            es.disp()
        res = es.result
        #pic50_cur, sigma_cur = res[0]**2 + [dr.pic50_exp_lower, dr.sigma_uniform_lower]
        pic50_cur = res[0][0]**2 + dr.pic50_exp_lower
        hill_cur = 1
    elif args.model == 2:
        #x0 = np.array([2.5, 1., 3.])
        x0 = np.array([2.5, 1.])
        es = cma.CMAEvolutionStrategy(x0, sigma0, opts)
        while not es.stop():
            X = es.ask()
            #es.tell(X, [-dr.log_target(responses, where_r_0, where_r_100, where_r_other, concs, x**2 + [dr.pic50_exp_lower, dr.hill_uniform_lower, dr.sigma_uniform_lower], temperature, pi_bit) for x in X])
            es.tell(X, [
                sum_of_square_diffs(
                    x**2 + [dr.pic50_exp_lower, dr.hill_uniform_lower], concs,
                    responses) for x in X
            ])
            es.disp()
        res = es.result
        #pic50_cur, hill_cur, sigma_cur = res[0]**2 + [dr.pic50_exp_lower, dr.hill_uniform_lower, dr.sigma_uniform_lower]
        pic50_cur, hill_cur = res[0]**2 + [
            dr.pic50_exp_lower, dr.hill_uniform_lower
        ]

    sigma_cur = initial_sigma(len(responses), res[1])
    #print "sigma_cur:", sigma_cur

    if args.model == 1:
        theta_cur = np.array([pic50_cur, sigma_cur])
    elif args.model == 2:
        theta_cur = np.array([pic50_cur, hill_cur, sigma_cur])

    #print "theta_cur:", theta_cur

    best_params_file = images_dir + "{}_{}_best_fit_params.txt".format(
        drug, channel)
    with open(best_params_file, "w") as outfile:
        outfile.write("# CMA-ES best fit params\n")
        if args.model == 1:
            outfile.write("# pIC50, sigma, (Hill=1, not included)\n")
        elif args.model == 2:
            outfile.write("# pIC50, Hill, sigma\n")
        np.savetxt(outfile, [theta_cur])

    proposal_scale = 0.05

    mean_estimate = np.copy(theta_cur)
    cov_estimate = proposal_scale * np.diag(np.copy(np.abs(theta_cur)))

    cmaes_ll = dr.log_target(responses, where_r_0, where_r_100, where_r_other,
                             concs, theta_cur, temperature, pi_bit)
    #print "cmaes_ll:", cmaes_ll

    best_fit_fig = plt.figure(figsize=(5, 4))
    best_fit_ax = best_fit_fig.add_subplot(111)
    best_fit_ax.set_xscale('log')
    best_fit_ax.grid()
    if np.min(concs) == 0:
        plot_lower_lim = int(np.log10(np.min(concs[np.nonzero(concs)]))) - 2
    else:
        plot_lower_lim = int(np.log10(np.min(concs))) - 2
    plot_upper_lim = int(np.log10(np.max(concs))) + 2
    best_fit_ax.set_xlim(10**plot_lower_lim, 10**plot_upper_lim)
    best_fit_ax.set_ylim(0, 100)
    num_x_pts = 1001
    x_range = np.logspace(plot_lower_lim, plot_upper_lim, num_x_pts)
    best_fit_curve = dr.dose_response_model(x_range, hill_cur,
                                            dr.pic50_to_ic50(pic50_cur))
    best_fit_ax.plot(x_range, best_fit_curve, label='Best fit', lw=2)
    best_fit_ax.set_ylabel('% {} block'.format(channel))
    best_fit_ax.set_xlabel(r'{} concentration ($\mu$M)'.format(drug))
    best_fit_ax.set_title(r'$pIC50 = {}, Hill = {}; SS = {}$'.format(
        np.round(pic50_cur, 2), np.round(hill_cur, 2), round(res[1], 2)))
    best_fit_ax.plot(concs,
                     responses,
                     "o",
                     color='orange',
                     ms=10,
                     label='Data',
                     zorder=10)
    best_fit_ax.legend(loc=2)
    best_fit_fig.tight_layout()
    best_fit_fig.savefig(
        images_dir +
        '{}_{}_model_{}_CMA-ES_best_fit.png'.format(drug, channel, args.model))
    best_fit_fig.savefig(
        images_dir +
        '{}_{}_model_{}_CMA-ES_best_fit.pdf'.format(drug, channel, args.model))
    plt.close()

    if args.best_fit_only:
        print "\nStopping {}+{} after doing and plotting best fit\n".format(
            drug, channel)
        return None

    # let MCMC look around for a bit before adaptive covariance matrix
    # same rule (100*dimension) as in hierarchical case
    when_to_adapt = 1000 * num_params

    log_target_cur = dr.log_target(responses, where_r_0, where_r_100,
                                   where_r_other, concs, theta_cur,
                                   temperature, pi_bit)

    #print "initial log_target_cur =", log_target_cur

    # effectively step size, scales covariance matrix
    loga = 0.
    # what fraction of proposed samples are being accepted into the chain
    acceptance = 0.
    # what fraction of samples we WANT accepted into the chain
    # loga updates itself to try to make this dream come true
    target_acceptance = 0.25

    total_iterations = args.iterations
    thinning = args.thinning
    assert (total_iterations % thinning == 0)

    # how often to print a little status message
    status_when = total_iterations / 20

    saved_iterations = total_iterations / thinning + 1
    # also want to store log-target value at each iteration
    chain = np.zeros((saved_iterations, num_params + 1))

    chain[0, :] = np.concatenate((np.copy(theta_cur), [log_target_cur]))
    #print chain[0]

    #print "concs:", concs
    #print "responses:", responses

    # for reproducible results, otherwise select a new random seed
    seed = 25
    npr.seed(seed)

    # MCMC!
    t = 1
    start = time.time()
    while t <= total_iterations:
        theta_star = npr.multivariate_normal(theta_cur,
                                             np.exp(loga) * cov_estimate)
        accepted = 0
        log_target_star = dr.log_target(responses, where_r_0, where_r_100,
                                        where_r_other, concs, theta_star,
                                        temperature, pi_bit)
        accept_prob = npr.rand()
        if (np.log(accept_prob) < log_target_star - log_target_cur):
            theta_cur = theta_star
            log_target_cur = log_target_star
            accepted = 1
        acceptance = ((t - 1.) * acceptance + accepted) / t
        if (t > when_to_adapt):
            s = t - when_to_adapt
            gamma_s = 1 / (s + 1)**0.6
            temp_covariance_bit = np.array([theta_cur - mean_estimate])
            cov_estimate = (1 - gamma_s) * cov_estimate + gamma_s * np.dot(
                np.transpose(temp_covariance_bit), temp_covariance_bit)
            mean_estimate = (1 - gamma_s) * mean_estimate + gamma_s * theta_cur
            loga += gamma_s * (accepted - target_acceptance)
        if (t % thinning == 0):
            chain[t / thinning, :] = np.concatenate(
                (np.copy(theta_cur), [log_target_cur]))
        if (t % status_when == 0):
            #print "{} / {}".format(t/status_when,total_iterations/status_when)
            time_taken_so_far = time.time() - start
            estimated_time_left = time_taken_so_far / t * (total_iterations -
                                                           t)
            #print "Time taken: {} s = {} min".format(np.round(time_taken_so_far,1),np.round(time_taken_so_far/60,2))
            #print "acceptance = {}".format(np.round(acceptance,5))
            #print "Estimated time remaining: {} s = {} min".format(np.round(estimated_time_left,1),np.round(estimated_time_left/60,2))
        t += 1

    #print "\nTime taken to do {} MCMC iterations: {} s\n".format(total_iterations, time.time()-start)
    #print "Final iteration:", chain[-1,:], "\n"

    burn_fraction = args.burn_in_fraction
    burn = saved_iterations / burn_fraction

    chain = chain[burn:, :]  # remove burn-in before saving
    with open(chain_file, 'w') as outfile:
        outfile.write(
            '# Nonhierarchical MCMC output for {} + {}: (Hill,pIC50,sigma,log-target)\n'
            .format(drug, channel))
        np.savetxt(outfile, chain)

    best_ll_index = np.argmax(chain[:, num_params])
    best_ll_row = chain[best_ll_index, :]
    #print "Best log-likelihood:", "\n", best_ll_row

    figs = []
    axs = []
    # plot all marginal posterior distributions
    for i in range(num_params):
        figs.append(plt.figure())
        axs.append([])
        axs[i].append(figs[i].add_subplot(211))
        axs[i][0].hist(chain[:, i],
                       bins=40,
                       normed=True,
                       color='blue',
                       edgecolor='blue')
        axs[i][0].legend()
        axs[i][0].set_title("MCMC marginal distributions")
        axs[i][0].set_ylabel("Normalised frequency")
        axs[i][0].grid()
        plt.setp(axs[i][0].get_xticklabels(), visible=False)
        axs[i].append(figs[i].add_subplot(212, sharex=axs[i][0]))
        axs[i][1].plot(chain[:, i], range(burn, saved_iterations))
        axs[i][1].invert_yaxis()
        axs[i][1].set_xlabel(dr.labels[i])
        axs[i][1].set_ylabel('Saved MCMC iteration')
        axs[i][1].grid()
        figs[i].tight_layout()
        figs[i].savefig(images_dir + '{}_{}_model_{}_{}_marginal.png'.format(
            drug, channel, args.model, dr.file_labels[i]))
        plt.close()

    # plot log-target path
    fig2 = plt.figure()
    ax3 = fig2.add_subplot(111)
    ax3.plot(range(burn, saved_iterations), chain[:, -1])
    ax3.set_xlabel('MCMC iteration')
    ax3.set_ylabel('log-target')
    ax3.grid()
    fig2.tight_layout()
    fig2.savefig(images_dir + 'log_target.png')
    plt.close()

    # plot scatterplot matrix of posterior(s)
    colormin, colormax = 1e9, 0
    norm = matplotlib.colors.Normalize(vmin=5, vmax=10)
    hidden_labels = []
    count = 0
    # there's probably a better way to do this
    # I plot all the histograms to normalize the colours, in an attempt to give a better comparison between the pairwise plots
    while count < 2:
        axes = {}
        matrix_fig = plt.figure(figsize=(3 * num_params, 3 * num_params))
        for i in range(num_params):
            for j in range(i + 1):
                ij = str(i) + str(j)
                subplot_position = num_params * i + j + 1
                if i == j:
                    axes[ij] = matrix_fig.add_subplot(num_params, num_params,
                                                      subplot_position)
                    axes[ij].hist(chain[:, i],
                                  bins=50,
                                  normed=True,
                                  color='blue',
                                  edgecolor='blue')
                elif j == 0:  # this column shares x-axis with top-left
                    axes[ij] = matrix_fig.add_subplot(num_params,
                                                      num_params,
                                                      subplot_position,
                                                      sharex=axes["00"])
                    counts, xedges, yedges, Image = axes[ij].hist2d(
                        chain[:, j],
                        chain[:, i],
                        cmap='hot_r',
                        bins=50,
                        norm=norm)
                    maxcounts = np.amax(counts)
                    if maxcounts > colormax:
                        colormax = maxcounts
                    mincounts = np.amin(counts)
                    if mincounts < colormin:
                        colormin = mincounts
                else:
                    axes[ij] = matrix_fig.add_subplot(
                        num_params,
                        num_params,
                        subplot_position,
                        sharex=axes[str(j) + str(j)],
                        sharey=axes[str(i) + "0"])
                    counts, xedges, yedges, Image = axes[ij].hist2d(
                        chain[:, j],
                        chain[:, i],
                        cmap='hot_r',
                        bins=50,
                        norm=norm)
                    maxcounts = np.amax(counts)
                    if maxcounts > colormax:
                        colormax = maxcounts
                    mincounts = np.amin(counts)
                    if mincounts < colormin:
                        colormin = mincounts
                axes[ij].xaxis.grid()
                if (i != j):
                    axes[ij].yaxis.grid()
                if i != num_params - 1:
                    hidden_labels.append(axes[ij].get_xticklabels())
                if j != 0:
                    hidden_labels.append(axes[ij].get_yticklabels())
                if i == j == 0:
                    hidden_labels.append(axes[ij].get_yticklabels())
                if i == num_params - 1:
                    axes[str(i) + str(j)].set_xlabel(dr.labels[j], fontsize=18)
                if j == 0 and i > 0:
                    axes[str(i) + str(j)].set_ylabel(dr.labels[i], fontsize=18)

                plt.xticks(rotation=30)
        norm = matplotlib.colors.Normalize(vmin=colormin, vmax=colormax)
        count += 1

    plt.setp(hidden_labels, visible=False)

    matrix_fig.tight_layout()
    matrix_fig.savefig(images_dir +
                       "{}_{}_model_{}_scatterplot_matrix.png".format(
                           drug, channel, args.model))
    matrix_fig.savefig(images_dir +
                       "{}_{}_model_{}_scatterplot_matrix.pdf".format(
                           drug, channel, args.model))
    plt.close()

    print "\n\n{} + {} complete!\n\n".format(drug, channel)
    return None
Exemple #5
0
abiguous_BFs = []

substantial_b12, substantial_b21, strong_b12, strong_b21, decisive_b12, decisive_b21 = 0, 0, 0, 0, 0, 0

m2_chain_dir = "/home/rossj/arcus-b-py-output"
def m2_chain_file(drug, channel):
    return m2_chain_dir + "{}_{}_model_2_temp_1.0_chain_nonhierarchical.txt".format(drug, channel)
    
no_evidence_count = 0

for i, j in drugs_channels_idx:

    top_drug = dr.drugs[i]
    top_channel = dr.channels[j]
        
    drug, channel, chain_file, images_dir = dr.nonhierarchical_chain_file_and_figs_dir(1, top_drug, top_channel, 1)
    bf_dir = "BFs/"
    bf_file = bf_dir + "{}_{}_B12.txt".format(drug,channel)

    BFs[i, j] = np.loadtxt(bf_file)
    
    for m in range(1,3):
        drug, channel, chain_file, images_dir = dr.nonhierarchical_chain_file_and_figs_dir(m, top_drug, top_channel, 1)
        best_params_file = images_dir+"{}_{}_best_fit_params.txt".format(drug, channel)
        best_params[m-1][(i,j)] = np.loadtxt(best_params_file)
        
    model = 2
    temp = 1.0
    drug, channel, chain_file, images_dir = dr.nonhierarchical_chain_file_and_figs_dir(model, top_drug, top_channel, temp)
    
    #chain = np.loadtxt(chain_file, usecols=[0,1,3])  # hill and log-target
Exemple #6
0
    if args.num_cores == 1:
        log_p_ys = np.zeros(num_temps)
        for i in xrange(num_temps):
            log_p_ys[i] = compute_log_py_approxn(temps[i])
    elif args.num_cores > 1:
        pool = mp.Pool(args.num_cores)
        log_p_ys = np.array(
            pool.map_async(compute_log_py_approxn, temps).get(9999))
        pool.close()
        pool.join()
    print log_p_ys
    expectations[m] = dr.trapezium_rule(temps, log_p_ys)
    print expectations

drug, channel, chain_file, images_dir = dr.nonhierarchical_chain_file_and_figs_dir(
    1, top_drug, top_channel, 1)
bf_dir = "BFs/"
if not os.path.exists(bf_dir):
    os.makedirs(bf_dir)
bf_file = bf_dir + "{}_{}_B12.txt".format(drug, channel)
for pair in model_pairs:
    i, j = pair
    #print expectations[i], expectations[j]
    Bij = np.exp(expectations[i] - expectations[j])
    #print Bij
    #with open("{}_{}_BF.txt".format(drug,channel), "w") as outfile:
    #    outfile.write("{} + {}\n".format(drug,channel))
    #    outfile.write("B_{}{} = {}\n".format(i, j, Bij))
    #    outfile.write("B_{}{} = {}\n".format(j, i, 1./Bij))
    np.savetxt(bf_file, [Bij])
Exemple #7
0
def run(drug_channel):
    drug, channel = drug_channel
    print "\n\n{} + {}\n\n".format(drug, channel)

    num_expts, experiment_numbers, experiments = dr.load_crumb_data(
        drug, channel)
    if (0 < args.num_expts < num_expts):
        num_expts = args.num_expts

    drug, channel, output_dir, chain_dir, figs_dir, chain_file = dr.hierarchical_output_dirs_and_chain_file(
        drug, channel, num_expts)

    chain = np.loadtxt(chain_file)
    end = chain.shape[0]
    burn = end / 4

    pic50_samples = np.zeros(args.num_hist_samples)
    hill_samples = np.zeros(args.num_hist_samples)
    rand_idx = npr.randint(burn, end, args.num_hist_samples)
    for t in xrange(args.num_hist_samples):
        alpha, beta, mu, s = chain[rand_idx[t], :4]
        hill_samples[t] = st.fisk.rvs(c=beta, scale=alpha, loc=0)
        pic50_samples[t] = st.logistic.rvs(mu, s)

    num_pts = 40
    fig = plt.figure(figsize=(11, 7))

    ax1 = fig.add_subplot(231)
    ax1.grid()
    xmin = -4
    xmax = 3
    concs = np.logspace(xmin, xmax, num_pts)
    ax1.set_xscale('log')
    ax1.set_ylim(0, 100)
    ax1.set_xlabel(r'{} concentration ($\mu$M)'.format(drug))
    ax1.set_ylabel(r'% {} block'.format(channel))
    ax1.set_title('A. Hierarchical predicted\nfuture experiments')
    ax1.set_xlim(10**xmin, 10**xmax)

    for expt in experiment_numbers:
        ax1.scatter(experiments[expt][:, 0],
                    experiments[expt][:, 1],
                    label='Expt {}'.format(expt + 1),
                    color=colors[expt],
                    s=100,
                    zorder=10)

    for i, conc in enumerate(args.concs):
        ax1.axvline(conc,
                    color=colors[3 + i],
                    lw=2,
                    label=r"{} $\mu$M".format(conc),
                    alpha=0.8)

    subset_idx = npr.randint(0, args.num_hist_samples, args.num_samples)
    for i in xrange(
            args.num_samples
    ):  # only plot the first T of the H samples (should be fine because they're all randomly selected)
        ax1.plot(concs,
                 dr.dose_response_model(concs, hill_samples[i],
                                        dr.pic50_to_ic50(pic50_samples[i])),
                 color='black',
                 alpha=0.01)

    lfs = 9

    ax1.legend(loc=2, fontsize=lfs)

    ax2 = fig.add_subplot(234)
    ax2.set_xlim(0, 100)
    ax2.set_xlabel(r'% {} block'.format(channel))
    ax2.set_ylabel(r'Probability density')
    ax2.grid()
    for i, conc in enumerate(args.concs):
        ax2.hist(dr.dose_response_model(conc, hill_samples,
                                        dr.pic50_to_ic50(pic50_samples)),
                 bins=50,
                 normed=True,
                 color=colors[3 + i],
                 alpha=0.8,
                 lw=0,
                 label=r"{} $\mu$M {}".format(conc, drug))

    ax2.set_title('D. Hierarchical predicted\nfuture experiments')
    ax2.legend(loc="best", fontsize=lfs)

    ax3 = fig.add_subplot(232, sharey=ax1, sharex=ax1)
    ax3.grid()
    ax3.set_xscale('log')
    ax3.set_ylim(0, 100)
    ax3.set_xlabel(r'{} concentration ($\mu$M)'.format(drug))
    ax3.set_title('B. Hierarchical inferred\nunderlying effects')
    ax3.set_xlim(10**xmin, 10**xmax)

    for expt in experiment_numbers:
        ax3.scatter(experiments[expt][:, 0],
                    experiments[expt][:, 1],
                    label='Expt {}'.format(expt + 1),
                    color=colors[expt],
                    s=100,
                    zorder=10)

    alpha_indices = npr.randint(burn, end, args.num_samples)
    alpha_samples = chain[alpha_indices, 0]
    mu_samples = chain[alpha_indices, 2]
    for i, conc in enumerate(args.concs):
        ax3.axvline(conc,
                    color=colors[3 + i],
                    lw=2,
                    label=r"{} $\mu$M".format(conc),
                    alpha=0.8)
    for i in xrange(args.num_samples):
        ax3.plot(concs,
                 dr.dose_response_model(concs, alpha_samples[i],
                                        dr.pic50_to_ic50(mu_samples[i])),
                 color='black',
                 alpha=0.01)
    ax3.legend(loc=2, fontsize=lfs)
    ax4 = fig.add_subplot(235, sharey=ax2, sharex=ax2)
    ax4.set_xlim(0, 100)
    ax4.set_xlabel(r'% {} block'.format(channel))
    ax4.grid()

    hist_indices = npr.randint(burn, end, args.num_hist_samples)
    alphas = chain[hist_indices, 0]
    mus = chain[hist_indices, 2]

    for i, conc in enumerate(args.concs):
        ax4.hist(dr.dose_response_model(conc, alphas, dr.pic50_to_ic50(mus)),
                 bins=50,
                 normed=True,
                 color=colors[3 + i],
                 alpha=0.8,
                 lw=0,
                 label=r"{} $\mu$M {}".format(conc, drug))
    ax4.set_title('E. Hierarchical inferred\nunderlying effects')

    plt.setp(ax3.get_yticklabels(), visible=False)
    plt.setp(ax4.get_yticklabels(), visible=False)

    # now plot non-hierarchical

    num_params = 3
    temperature = 1
    if args.fix_hill:
        model = 1
    else:
        model = 2
    drug, channel, chain_file, figs_dir = dr.nonhierarchical_chain_file_and_figs_dir(
        model, drug, channel, temperature)
    chain = np.loadtxt(
        chain_file,
        usecols=range(num_params -
                      1))  # not interested in log-target values right now
    end = chain.shape[0]
    burn = end / 4

    sample_indices = npr.randint(burn, end, args.num_samples)
    samples = chain[sample_indices, :]

    ax5 = fig.add_subplot(233, sharey=ax1, sharex=ax1)
    ax5.grid()
    plt.setp(ax5.get_yticklabels(), visible=False)
    ax5.set_xscale('log')
    ax5.set_ylim(0, 100)
    ax5.set_xlim(10**xmin, 10**xmax)
    ax5.set_xlabel(r'{} concentration ($\mu$M)'.format(drug))
    ax5.set_title('C. Single-level inferred\neffects')
    ax4.legend(loc="best", fontsize=lfs)

    for expt in experiment_numbers:
        if expt == 1:
            ax5.scatter(experiments[expt][:, 0],
                        experiments[expt][:, 1],
                        color='orange',
                        s=100,
                        label='All expts',
                        zorder=10)
        else:
            ax5.scatter(experiments[expt][:, 0],
                        experiments[expt][:, 1],
                        color='orange',
                        s=100,
                        zorder=10)

    for i, conc in enumerate(args.concs):
        ax5.axvline(conc,
                    color=colors[3 + i],
                    alpha=0.8,
                    lw=2,
                    label=r"{} $\mu$M".format(conc))
    for i in xrange(args.num_samples):
        pic50, hill = samples[i, :]
        ax5.plot(concs,
                 dr.dose_response_model(concs, hill, dr.pic50_to_ic50(pic50)),
                 color='black',
                 alpha=0.01)
    ax5.legend(loc=2, fontsize=lfs)

    sample_indices = npr.randint(burn, end, args.num_hist_samples)
    samples = chain[sample_indices, :]
    ax6 = fig.add_subplot(236, sharey=ax2, sharex=ax2)
    ax6.set_xlim(0, 100)
    ax6.set_xlabel(r'% {} block'.format(channel))
    plt.setp(ax6.get_yticklabels(), visible=False)
    ax6.grid()
    for i, conc in enumerate(args.concs):
        ax6.hist(dr.dose_response_model(conc, samples[:, 1],
                                        dr.pic50_to_ic50(samples[:, 0])),
                 bins=50,
                 normed=True,
                 alpha=0.8,
                 color=colors[3 + i],
                 lw=0,
                 label=r"{} $\mu$M {}".format(conc, drug))
    ax6.set_title('F. Single-level inferred\neffects')

    ax6.legend(loc="best", fontsize=lfs)

    plot_dir = dr.all_predictions_dir(drug, channel)

    fig.tight_layout()
    png_file = plot_dir + '{}_{}_all_predictions_corrected.png'.format(
        drug, channel)
    print png_file
    fig.savefig(png_file)
    pdf_file = plot_dir + '{}_{}_all_predictions_corrected.pdf'.format(
        drug, channel)
    print pdf_file
    fig.savefig(
        pdf_file
    )  # uncomment to save as pdf, or change extension to whatever you want

    plt.close()

    print "Figures saved in", plot_dir
Exemple #8
0
def do_plots(drug_channel):
    top_drug, top_channel = drug_channel

    num_expts, experiment_numbers, experiments = dr.load_crumb_data(
        top_drug, top_channel)

    concs = np.array([])
    responses = np.array([])
    for i in xrange(num_expts):
        concs = np.concatenate((concs, experiments[i][:, 0]))
        responses = np.concatenate((responses, experiments[i][:, 1]))

    xmin = 1000
    xmax = -1000
    for expt in experiments:
        a = np.min(expt[:, 0])
        b = np.max(expt[:, 0])
        if a > 0 and a < xmin:
            xmin = a
        if b > xmax:
            xmax = b

    xmin = int(np.log10(xmin)) - 2
    xmax = int(np.log10(xmax)) + 3

    x = np.logspace(xmin, xmax, num_pts)

    fig, (ax1, ax2) = plt.subplots(1,
                                   2,
                                   figsize=(9, 4),
                                   sharey=True,
                                   sharex=True)
    ax1.set_xscale('log')
    ax1.grid()
    ax2.grid()
    ax1.set_xlim(10**xmin, 10**xmax)
    ax1.set_ylim(0, 100)
    ax1.set_xlabel(r'{} concentration ($\mu$M)'.format(top_drug))
    ax2.set_xlabel(r'{} concentration ($\mu$M)'.format(top_drug))
    ax1.set_ylabel(r'% {} block'.format(top_channel))

    model = 1
    drug, channel, chain_file, images_dir = dr.nonhierarchical_chain_file_and_figs_dir(
        model, top_drug, top_channel, temperature)

    chain = np.loadtxt(chain_file)
    best_idx = np.argmax(chain[:, -1])
    best_pic50, best_sigma = chain[best_idx, [0, 1]]

    saved_its, h = chain.shape
    rand_idx = npr.randint(saved_its, size=num_curves)

    pic50s = chain[rand_idx, 0]
    ax1.set_title(r'Model 1: $pIC50 = {}$, fixed $Hill = 1$'.format(
        np.round(best_pic50, 2)))
    for i in xrange(num_curves):
        ax1.plot(x,
                 dr.dose_response_model(x, 1., dr.pic50_to_ic50(pic50s[i])),
                 color='black',
                 alpha=0.02)
    max_pd_curve = dr.dose_response_model(x, 1., dr.pic50_to_ic50(best_pic50))
    ax1.plot(x, max_pd_curve, label='Max PD', lw=1.5, color='red')
    ax1.plot(concs,
             responses,
             "o",
             color='orange',
             ms=10,
             label='Data',
             zorder=10)

    anyArtist = plt.Line2D((0, 1), (0, 0), color='k')

    handles, labels = ax1.get_legend_handles_labels()

    if drug == "Quinine" and channel == "Nav1.5-late":
        loc = 4
    else:
        loc = 2
    ax1.legend(handles + [anyArtist], labels + ["Samples"], loc=loc)

    model = 2
    drug, channel, chain_file, images_dir = dr.nonhierarchical_chain_file_and_figs_dir(
        model, top_drug, top_channel, temperature)

    chain = np.loadtxt(chain_file)
    best_idx = np.argmax(chain[:, -1])
    best_pic50, best_hill, best_sigma = chain[best_idx, [0, 1, 2]]

    ax2.set_title(r"Model 2: $pIC50={}$, Hill = {}".format(
        np.round(best_pic50, 2), np.round(best_hill, 2)))

    saved_its, h = chain.shape
    rand_idx = npr.randint(saved_its, size=num_curves)

    pic50s = chain[rand_idx, 0]
    hills = chain[rand_idx, 1]
    for i in xrange(num_curves):
        ax2.plot(x,
                 dr.dose_response_model(x, hills[i],
                                        dr.pic50_to_ic50(pic50s[i])),
                 color='black',
                 alpha=0.02)
    max_pd_curve = dr.dose_response_model(x, best_hill,
                                          dr.pic50_to_ic50(best_pic50))
    ax2.plot(x, max_pd_curve, label='Max PD', lw=1.5, color='red')
    ax2.plot(concs,
             responses,
             "o",
             color='orange',
             ms=10,
             label='Data',
             zorder=10)

    handles, labels = ax2.get_legend_handles_labels()
    ax2.legend(handles + [anyArtist], labels + ["Samples"], loc=loc)

    fig.tight_layout()
    #plt.show(block=True)
    #sys.exit()

    fig.savefig("{}_{}_nonh_both_models_mcmc_prediction_curves.png".format(
        drug, channel))
    plt.close()

    return None
Exemple #9
0
def run(drug_channel):
    drug,channel = drug_channel
    print "\n\n{} + {}\n\n".format(drug,channel)
    
    num_expts, experiment_numbers, experiments = dr.load_crumb_data(drug,channel)
    if (0 < args.num_expts < num_expts):
        num_expts = args.num_expts
        
    drug, channel, output_dir, chain_dir, figs_dir, chain_file = dr.hierarchical_output_dirs_and_chain_file(drug,channel,num_expts)
    
    hill_cdf_file, pic50_cdf_file = dr.hierarchical_posterior_predictive_cdf_files(drug,channel,num_expts)
    
    hill_cdf = np.loadtxt(hill_cdf_file)
    pic50_cdf = np.loadtxt(pic50_cdf_file)
    
    num_samples = 2000
    
    unif_hill_samples = npr.rand(num_samples)
    unif_pic50_samples = npr.rand(num_samples)
    
    hill_samples = np.interp(unif_hill_samples, hill_cdf[:,1], hill_cdf[:,0])
    pic50_samples = np.interp(unif_pic50_samples, pic50_cdf[:,1], pic50_cdf[:,0])
    
    
    
    
    fig = plt.figure(figsize=(11,7))
    
    
    ax1 = fig.add_subplot(231)
    ax1.grid()
    xmin = -4
    xmax = 3
    concs = np.logspace(xmin,xmax,101)
    ax1.set_xscale('log')
    ax1.set_ylim(0,100)
    ax1.set_xlabel(r'{} concentration ($\mu$M)'.format(drug))
    ax1.set_ylabel(r'% {} block'.format(channel))
    ax1.set_title('A. Hierarchical predicted\nfuture experiments')
    ax1.set_xlim(10**xmin,10**xmax)
    
    for expt in experiment_numbers:
        ax1.scatter(experiments[expt][:,0],experiments[expt][:,1],label='Expt {}'.format(expt+1),color=colors[expt],s=100,zorder=10)
    
    for i, conc in enumerate(args.concs):
        ax1.axvline(conc,color=colors[3+i],lw=2,label=r"{} $\mu$M".format(conc),alpha=0.8)
    for i in xrange(num_samples):
        ax1.plot(concs,dr.dose_response_model(concs,hill_samples[i],dr.pic50_to_ic50(pic50_samples[i])),color='black',alpha=0.01)
    ax1.legend(loc=2,fontsize=10)
    
    num_hist_samples = 100000
    
    unif_hill_samples = npr.rand(num_hist_samples)
    unif_pic50_samples = npr.rand(num_hist_samples)
    
    hill_samples = np.interp(unif_hill_samples, hill_cdf[:,1], hill_cdf[:,0])
    pic50_samples = np.interp(unif_pic50_samples, pic50_cdf[:,1], pic50_cdf[:,0])
    
    ax2 = fig.add_subplot(234)
    ax2.set_xlim(0,100)
    ax2.set_xlabel(r'% {} block'.format(channel))
    ax2.set_ylabel(r'Probability density')
    ax2.grid()
    for i, conc in enumerate(args.concs):
        ax2.hist(dr.dose_response_model(conc,hill_samples,dr.pic50_to_ic50(pic50_samples)),bins=50,normed=True,color=colors[3+i],alpha=0.8,edgecolor='none',label=r"{} $\mu$M {}".format(conc,drug))
    
    ax2.set_title('D. Hierarchical predicted\nfuture experiments')
    ax2.legend(loc=2,fontsize=10)
        
    ax3 = fig.add_subplot(232,sharey=ax1)
    ax3.grid()
    xmin = -4
    xmax = 3
    concs = np.logspace(xmin,xmax,101)
    ax3.set_xscale('log')
    ax3.set_ylim(0,100)
    ax3.set_xlabel(r'{} concentration ($\mu$M)'.format(drug))
    ax3.set_title('B. Hierarchical inferred\nunderlying effects')
    ax3.set_xlim(10**xmin,10**xmax)
    
    for expt in experiment_numbers:
        ax3.scatter(experiments[expt][:,0],experiments[expt][:,1],label='Expt {}'.format(expt+1),color=colors[expt],s=100,zorder=10)
    
    chain = np.loadtxt(chain_file)
    end = chain.shape[0]
    burn = end/4
    
    num_samples = 1000
    alpha_indices = npr.randint(burn,end,num_samples)
    alpha_samples = chain[alpha_indices,0]
    mu_samples = chain[alpha_indices,2]
    for i, conc in enumerate(args.concs):
        ax3.axvline(conc,color=colors[3+i],lw=2,label=r"{} $\mu$M".format(conc),alpha=0.8)
    for i in xrange(num_samples):
        ax3.plot(concs,dr.dose_response_model(concs,alpha_samples[i],dr.pic50_to_ic50(mu_samples[i])),color='black',alpha=0.01)
    ax3.legend(loc=2,fontsize=10)
    ax4 = fig.add_subplot(235,sharey=ax2)
    ax4.set_xlim(0,100)
    ax4.set_xlabel(r'% {} block'.format(channel))
    ax4.grid()
    
    num_hist_samples = 100000
    hist_indices = npr.randint(burn,end,num_hist_samples)
    alphas = chain[hist_indices,0]
    mus = chain[hist_indices,2]
    
    for i, conc in enumerate(args.concs):
        ax4.hist(dr.dose_response_model(conc,alphas,dr.pic50_to_ic50(mus)),bins=50,normed=True,color=colors[3+i],alpha=0.8,edgecolor='none',label=r"{} $\mu$M {}".format(conc,drug))
    ax4.set_title('E. Hierarchical inferred\nunderlying effects')
    
    plt.setp(ax3.get_yticklabels(), visible=False)
    plt.setp(ax4.get_yticklabels(), visible=False)
    
    
    # now plot non-hierarchical
    
    num_params = 3
    drug,channel,chain_file,figs_dir = dr.nonhierarchical_chain_file_and_figs_dir(drug, channel, args.fix_hill)
    chain = np.loadtxt(chain_file,usecols=range(num_params-1)) # not interested in log-target values right now
    end = chain.shape[0]
    burn = end/4

    num_samples = 1000
    sample_indices = npr.randint(burn,end,num_samples)
    samples = chain[sample_indices,:]
    
    
    ax5 = fig.add_subplot(233,sharey=ax1)
    ax5.grid()
    plt.setp(ax5.get_yticklabels(), visible=False)
    xmin = -4
    xmax = 4
    concs = np.logspace(xmin,xmax,101)
    ax5.set_xscale('log')
    ax5.set_ylim(0,100)
    ax5.set_xlim(10**xmin,10**xmax)
    ax5.set_xlabel(r'{} concentration ($\mu$M)'.format(drug))
    ax5.set_title('C. Single-level inferred\neffects')
    ax5.legend(fontsize=10)
    
    for expt in experiment_numbers:
        if expt==1:
            ax5.scatter(experiments[expt][:,0],experiments[expt][:,1],color='orange',s=100,label='All expts',zorder=10)
        else:
            ax5.scatter(experiments[expt][:,0],experiments[expt][:,1],color='orange',s=100,zorder=10)
    
    for i, conc in enumerate(args.concs):
        ax5.axvline(conc,color=colors[3+i],alpha=0.8,lw=2,label=r"{} $\mu$M".format(conc))
    for i in xrange(num_samples):
        ax5.plot(concs,dr.dose_response_model(concs,samples[i,0],dr.pic50_to_ic50(samples[i,1])),color='black',alpha=0.01)
    ax5.legend(loc=2,fontsize=10)
    
    num_hist_samples = 50000
    sample_indices = npr.randint(burn,end,num_hist_samples)
    samples = chain[sample_indices,:]
    ax6 = fig.add_subplot(236,sharey=ax2)
    ax6.set_xlim(0,100)
    ax6.set_xlabel(r'% {} block'.format(channel))
    plt.setp(ax6.get_yticklabels(), visible=False)
    ax6.grid()
    for i, conc in enumerate(args.concs):
        ax6.hist(dr.dose_response_model(conc,samples[:,0],dr.pic50_to_ic50(samples[:,1])),bins=50,normed=True,alpha=0.8,color=colors[3+i],edgecolor='none',label=r"{} $\mu$M {}".format(conc,drug))
    ax6.set_title('F. Single-level inferred\neffects')

    ax2.legend(loc=2,fontsize=10)
    
    
    
    plot_dir = dr.all_predictions_dir(drug,channel)
    
    fig.tight_layout()
    fig.savefig(plot_dir+'{}_{}_all_predictions.png'.format(drug,channel))
    fig.savefig(plot_dir+'{}_{}_all_predictions.pdf'.format(drug,channel)) # uncomment to save as pdf, or change extension to whatever you want
    

    plt.close()
    
    print "Figures saved in", plot_dir
Exemple #10
0
def run_single_level(drug_channel):

    drug, channel = drug_channel

    num_expts, experiment_numbers, experiments = dr.load_crumb_data(
        drug, channel)
    drug, channel, chain_file, images_dir = dr.nonhierarchical_chain_file_and_figs_dir(
        drug, channel)

    num_params = 3  # hill, pic50, mu

    concs = np.array([])
    responses = np.array([])
    for i in xrange(num_expts):
        concs = np.concatenate((concs, experiments[i][:, 0]))
        responses = np.concatenate((responses, experiments[i][:, 1]))

    print experiments
    print concs
    print responses

    # uniform prior intervals
    hill_prior = [0, 10]
    pic50_prior = [-1, 20]
    sigma_prior = [1e-3, 50]

    prior_lowers = np.array([hill_prior[0], pic50_prior[0], sigma_prior[0]])
    prior_uppers = np.array([hill_prior[1], pic50_prior[1], sigma_prior[1]])

    # for reproducible results, otherwise select a new random seed
    seed = 1
    npr.seed(seed)

    start = time.time()
    x0 = np.array(
        [1., 2.5]
    )  # not fitting sigma by CMA-ES, can maximise log-likelihood wrt sigma analytically
    sigma0 = 0.1
    opts = cma.CMAOptions()
    opts['seed'] = seed
    es = cma.CMAEvolutionStrategy(x0, sigma0, opts)
    while not es.stop():
        X = es.ask()
        es.tell(X, [sum_of_square_diffs(x, concs, responses) for x in X])
        es.disp()
    res = es.result()

    hill_cur = res[0][0]**2
    pic50_cur = res[0][1]**2 - 1
    sigma_cur = initial_sigma(len(responses), res[1])
    proposal_scale = 0.01

    theta_cur = np.array([hill_cur, pic50_cur, sigma_cur])
    mean_estimate = np.copy(theta_cur)
    cov_estimate = proposal_scale * np.diag(np.copy(np.abs(theta_cur)))

    cmaes_ll = log_likelihood_single(responses, concs, theta_cur)

    best_fit_fig = plt.figure(figsize=(5, 4))
    best_fit_ax = best_fit_fig.add_subplot(111)
    best_fit_ax.set_xscale('log')
    best_fit_ax.grid()
    plot_lower_lim = int(np.log10(np.min(concs))) - 1
    plot_upper_lim = int(np.log10(np.max(concs))) + 2
    best_fit_ax.set_xlim(10**plot_lower_lim, 10**plot_upper_lim)
    best_fit_ax.set_ylim(0, 100)
    num_pts = 1001
    x_range = np.logspace(plot_lower_lim, plot_upper_lim, num_pts)
    best_fit_curve = dr.dose_response_model(x_range, hill_cur,
                                            dr.pic50_to_ic50(pic50_cur))
    best_fit_ax.plot(x_range, best_fit_curve, label='Best fit', lw=2)
    best_fit_ax.set_ylabel('% {} block'.format(channel))
    best_fit_ax.set_xlabel(r'{} concentration ($\mu$M)'.format(drug))
    best_fit_ax.set_title('Hill = {}, pIC50 = {}'.format(
        np.round(hill_cur, 2), np.round(pic50_cur, 2)))
    best_fit_ax.scatter(concs,
                        responses,
                        marker="o",
                        color='orange',
                        s=100,
                        label='Data',
                        zorder=10)
    best_fit_ax.legend(loc=2)
    best_fit_fig.tight_layout()
    best_fit_fig.savefig(images_dir +
                         '{}_{}_CMA-ES_best_fit.png'.format(drug, channel))
    best_fit_fig.savefig(images_dir +
                         '{}_{}_CMA-ES_best_fit.png'.format(drug, channel))
    plt.close()

    #sys.exit() # uncomment if you only want to plot the best fit

    # let MCMC look around for a bit before adaptive covariance matrix
    # same rule (100*dimension) as in hierarchical case
    when_to_adapt = 100 * num_params

    log_target_cur = log_likelihood_single(responses, concs, theta_cur)
    print "initial log_target_cur =", log_target_cur

    # effectively step size, scales covariance matrix
    loga = 0.
    # what fraction of proposed samples are being accepted into the chain
    acceptance = 0.
    # what fraction of samples we WANT accepted into the chain
    # loga updates itself to try to make this dream come true
    target_acceptance = 0.25

    total_iterations = args.iterations
    thinning = args.thinning
    assert (total_iterations % thinning == 0)

    # how often to print a little status message
    status_when = total_iterations / 20

    saved_iterations = total_iterations / thinning + 1
    # also want to store log-target value at each iteration
    chain = np.zeros((saved_iterations, num_params + 1))

    chain[0, :] = np.concatenate((np.copy(theta_cur), [log_target_cur]))
    print chain[0]

    print "concs:", concs
    print "responses:", responses

    # MCMC!
    t = 1
    start = time.time()
    while t <= total_iterations:
        theta_star = npr.multivariate_normal(theta_cur,
                                             np.exp(loga) * cov_estimate)
        accepted = 0
        if np.all(prior_lowers < theta_star) and np.all(
                theta_star < prior_uppers):
            log_target_star = log_likelihood_single(responses, concs,
                                                    theta_star)
            accept_prob = npr.rand()
            if (np.log(accept_prob) < log_target_star - log_target_cur):
                theta_cur = theta_star
                log_target_cur = log_target_star
                accepted = 1
        acceptance = ((t - 1.) * acceptance + accepted) / t
        if (t > when_to_adapt):
            s = t - when_to_adapt
            gamma_s = 1 / (s + 1)**0.6
            temp_covariance_bit = np.array([theta_cur - mean_estimate])
            cov_estimate = (1 - gamma_s) * cov_estimate + gamma_s * np.dot(
                np.transpose(temp_covariance_bit), temp_covariance_bit)
            mean_estimate = (1 - gamma_s) * mean_estimate + gamma_s * theta_cur
            loga += gamma_s * (accepted - target_acceptance)
        if (t % thinning == 0):
            chain[t / thinning, :] = np.concatenate(
                (np.copy(theta_cur), [log_target_cur]))
        if (t % status_when == 0):
            print "{} / {}".format(t / status_when,
                                   total_iterations / status_when)
            time_taken_so_far = time.time() - start
            estimated_time_left = time_taken_so_far / t * (total_iterations -
                                                           t)
            print "Time taken: {} s = {} min".format(
                np.round(time_taken_so_far, 1),
                np.round(time_taken_so_far / 60, 2))
            print "acceptance = {}".format(np.round(acceptance, 5))
            print "Estimated time remaining: {} s = {} min".format(
                np.round(estimated_time_left, 1),
                np.round(estimated_time_left / 60, 2))
        t += 1

    print "\nTime taken to do {} MCMC iterations: {} s\n".format(
        total_iterations,
        time.time() - start)
    print "Final iteration:", chain[-1, :], "\n"

    with open(chain_file, 'w') as outfile:
        outfile.write(
            '# Nonhierarchical MCMC output for {} + {}: (Hill,pIC50,sigma,log-target)\n'
            .format(drug, channel))
        np.savetxt(outfile, chain)

    try:
        assert (len(chain[:, 0]) == saved_iterations)
    except AssertionError:
        print "len(chain[:,0])!=saved_iterations"
        sys.exit()

    burn_fraction = args.burn_in_fraction
    burn = saved_iterations / burn_fraction

    best_ll_index = np.argmax(chain[:, num_params])
    best_ll_row = chain[best_ll_index, :]
    print "Best log-likelihood:", "\n", best_ll_row

    figs = []
    axs = []
    # plot all marginal posterior distributions
    for i in range(num_params):
        labels = ['Hill', 'pIC50', r'$\sigma$']
        file_labels = ['Hill', 'pIC50', 'sigma']
        figs.append(plt.figure())
        axs.append([])
        axs[i].append(figs[i].add_subplot(211))
        axs[i][0].hist(chain[burn:, i], bins=40, normed=True)
        axs[i][0].legend()
        axs[i][0].set_title("MCMC marginal distributions")
        axs[i].append(figs[i].add_subplot(212, sharex=axs[i][0]))
        axs[i][1].plot(chain[burn:, i], range(burn, saved_iterations))
        axs[i][1].invert_yaxis()
        axs[i][1].set_xlabel(labels[i])
        axs[i][1].set_ylabel('Saved MCMC iteration')
        figs[i].tight_layout()
        figs[i].savefig(
            images_dir +
            '{}_{}_{}_marginal.png'.format(drug, channel, file_labels[i]))
        plt.close()

    # plot log-target path
    fig2 = plt.figure()
    ax3 = fig2.add_subplot(111)
    ax3.plot(range(saved_iterations), chain[:, -1])
    ax3.set_xlabel('MCMC iteration')
    ax3.set_ylabel('log-target')
    fig2.tight_layout()
    fig2.savefig(images_dir + 'log_target.png')
    plt.close()

    # plot scatterplot matrix of posterior(s)
    labels = ['Hill', 'pIC50', r'$\sigma$']
    colormin, colormax = 1e9, 0
    norm = matplotlib.colors.Normalize(vmin=5, vmax=10)
    hidden_labels = []
    count = 0
    # there's probably a better way to do this
    # I plot all the histograms to normalize the colours, in an attempt to give a better comparison between the pairwise plots
    while count < 2:
        axes = {}
        matrix_fig = plt.figure(figsize=(3 * num_params, 3 * num_params))
        for i in range(num_params):
            for j in range(i + 1):
                ij = str(i) + str(j)
                subplot_position = num_params * i + j + 1
                if i == j:
                    axes[ij] = matrix_fig.add_subplot(num_params, num_params,
                                                      subplot_position)
                    axes[ij].hist(chain[burn:, i],
                                  bins=50,
                                  normed=True,
                                  color='blue')
                elif j == 0:  # this column shares x-axis with top-left
                    axes[ij] = matrix_fig.add_subplot(num_params,
                                                      num_params,
                                                      subplot_position,
                                                      sharex=axes["00"])
                    counts, xedges, yedges, Image = axes[ij].hist2d(
                        chain[burn:, j],
                        chain[burn:, i],
                        cmap='hot_r',
                        bins=50,
                        norm=norm)
                    maxcounts = np.amax(counts)
                    if maxcounts > colormax:
                        colormax = maxcounts
                    mincounts = np.amin(counts)
                    if mincounts < colormin:
                        colormin = mincounts
                else:
                    axes[ij] = matrix_fig.add_subplot(
                        num_params,
                        num_params,
                        subplot_position,
                        sharex=axes[str(j) + str(j)],
                        sharey=axes[str(i) + "0"])
                    counts, xedges, yedges, Image = axes[ij].hist2d(
                        chain[burn:, j],
                        chain[burn:, i],
                        cmap='hot_r',
                        bins=50,
                        norm=norm)
                    maxcounts = np.amax(counts)
                    if maxcounts > colormax:
                        colormax = maxcounts
                    mincounts = np.amin(counts)
                    if mincounts < colormin:
                        colormin = mincounts
                if i != num_params - 1:
                    hidden_labels.append(axes[ij].get_xticklabels())
                if j != 0:
                    hidden_labels.append(axes[ij].get_yticklabels())
                if i == num_params - 1:
                    axes[str(i) + str(j)].set_xlabel(labels[j])
                if j == 0:
                    axes[str(i) + str(j)].set_ylabel(labels[i])
                plt.xticks(rotation=30)
        norm = matplotlib.colors.Normalize(vmin=colormin, vmax=colormax)
        count += 1

    plt.setp(hidden_labels, visible=False)

    matrix_fig.tight_layout()
    matrix_fig.savefig(images_dir +
                       "{}_{}_scatterplot_matrix.png".format(drug, channel))
    #matrix_fig.savefig(images_dir+"{}_{}_scatterplot_matrix.pdf".format(drug,channel))
    plt.close()

    print "\n\n{} + {} complete!\n\n".format(drug, channel)