Example #1
0
def log_likelihood_single_fix_hill(measurements, doses, theta):
    # using hill = 1, but not bothering to assign it
    pIC50, sigma = theta
    IC50 = dr.pic50_to_ic50(pIC50)
    return -len(measurements) * np.log(sigma) - np.sum(
        (measurements - dr.dose_response_model(doses, 1, IC50))**
        2) / (2. * sigma**2)
Example #2
0
def log_likelihood_single(measurements, doses, theta):
    hill = theta[0]
    pIC50 = theta[1]
    sigma = theta[2]
    IC50 = dr.pic50_to_ic50(pIC50)
    return -len(measurements) * np.log(sigma) - np.sum(
        (measurements - dr.dose_response_model(doses, hill, IC50))**
        2) / (2. * sigma**2)
Example #3
0
def sum_of_square_diffs(params, model):
    if model == 1:
        pic50 = params[0]
        hill = 1
    elif model == 2:
        pic50, hill = params
    if hill <= hill_lower or hill > hill_upper or pic50 <= pic50_lower:
        return 1e9
    else:
        predicted = dr.dose_response_model(concs, hill, dr.pic50_to_ic50(pic50))
        return np.sum((responses-predicted)**2)
Example #4
0
def sum_of_square_diffs(params, model):
    if model == 1:
        pic50 = params[0]
        hill = 1
    elif model == 2:
        pic50, hill = params
    if hill <= hill_lower or hill > hill_upper or pic50 <= pic50_lower:
        return 1e9
    else:
        predicted = dr.dose_response_model(concs, hill,
                                           dr.pic50_to_ic50(pic50))
        return np.sum((responses - predicted)**2)
Example #5
0
def log_data_likelihood(hill_is, pic50_is, sigma, experiments):
    Ne = len(experiments)
    answer = 0.
    for i in range(Ne):
        ic50 = dr.pic50_to_ic50(pic50_is[i])
        concs = experiments[i][:, 0]
        num_expt_pts = len(concs)
        data = experiments[i][:, 1]
        model_responses = dr.dose_response_model(concs, hill_is[i], ic50)
        exp_bit = np.sum((data - model_responses)**2) / (2 * sigma**2)
        # assuming noise Normal is truncated at 0 and 100
        truncated_scale = np.sum(
            np.log(
                st.norm.cdf(100, model_responses, sigma) -
                st.norm.cdf(0, model_responses, sigma)))
        answer -= (num_expt_pts * np.log(sigma) + exp_bit + truncated_scale)
    if np.isnan(answer):
        print "NaN from log_data_likelihood!"
        print "hill_is =", hill_is
        print "pic50_is =", pic50_is
        print "sigma =", sigma
        sys.exit()
    return answer
Example #6
0
def do_plot(drug_channel):
    global concs, responses

    fig = plt.figure(figsize=(5, 8))
    axes = {}
    axes[1] = fig.add_subplot(211)
    axes[2] = fig.add_subplot(212)  #, sharey=axes[1])

    fsize = 14

    for model in xrange(1, num_models + 1):

        dr.define_model(model)

        drug, channel = drug_channel
        num_expts, experiment_numbers, experiments = dr.load_crumb_data(
            drug, channel)
        figs_dir = dr.drug_channel_figs_dir(drug, channel)

        concs = np.array([])
        responses = np.array([])
        for i in xrange(num_expts):
            concs = np.concatenate((concs, experiments[i][:, 0]))
            responses = np.concatenate((responses, experiments[i][:, 1]))

        if model == 1:
            x0 = np.ones(2)
            sigma0 = 0.1
        elif model == 2:
            x0 = np.copy([pic50, hill])
            sigma0 = 0.01
        #x0[0] = 6.9

        opts = cma.CMAOptions()
        es = cma.CMAEvolutionStrategy(x0, sigma0, opts)
        while not es.stop():
            X = es.ask()
            f_vals = [sum_of_square_diffs(x, model) for x in X]
            es.tell(X, f_vals)
            es.disp()
        res = es.result()
        ss = res[1]
        pic50, hill = res[0]
        if model == 1:
            hill = 1

        conc_min = np.min(concs)
        conc_max = np.max(concs)

        num_pts = 501
        x_range = np.logspace(
            int(np.log10(conc_min)) - 1,
            int(np.log10(conc_max)) + 2, num_pts)
        predicted = dr.dose_response_model(x_range, hill,
                                           dr.pic50_to_ic50(pic50))

        #fig = plt.figure(figsize=(5,4))
        #ax = fig.add_subplot(111)
        axes[model].grid()
        axes[model].set_xscale('log')
        axes[model].set_ylim(0, 100)
        axes[model].set_ylabel(r"% {} block".format(channel), fontsize=fsize)
        axes[model].set_xlabel(r"{} concentration ($\mu$M)".format(drug),
                               fontsize=fsize)
        axes[model].plot(x_range,
                         predicted,
                         color='blue',
                         lw=2,
                         label="Best fit")
        axes[model].plot(concs,
                         responses,
                         'o',
                         color='orange',
                         ms=10,
                         label="Expt data")
        axes[model].legend(loc=2)
        axes[model].set_title("$M_{}, pIC50 = {}, Hill = {}, SS = {}$".format(
            model, round(pic50, 2), round(hill, 2), round(ss, 2)),
                              fontsize=fsize)
    #axes[2].set_yticklabels([])
    fig.tight_layout()
    #fig.savefig(figs_dir+"{}_{}_model_{}_best_fit.png".format(drug,channel,model))
    fig.savefig(all_figs_dir + "{}_{}_best_fits.png".format(drug, channel))
    fig.savefig(figs_dir + "{}_{}_best_fit.pdf".format(drug, channel))
    plt.close()
Example #7
0
def run_single_level(drug_channel):

    drug, channel = drug_channel

    print "\n\n{} + {}\n\n".format(drug, channel)

    seed = 100

    try:
        num_expts, experiment_numbers, experiments = dr.load_crumb_data(
            drug, channel)
    except:
        print "Problem loading data, guessing there are no entries for {} + {} --- skipping".format(
            drug, channel)
        return None

    drug, channel, chain_file, images_dir = dr.nonhierarchical_chain_file_and_figs_dir(
        args.model, drug, channel, temperature)

    concs = np.array([])
    responses = np.array([])
    for i in xrange(num_expts):
        concs = np.concatenate((concs, experiments[i][:, 0]))
        responses = np.concatenate((responses, experiments[i][:, 1]))

    if np.any(np.isnan(responses)):
        print "Skipping {} because of empty responses / missing data".format(
            drug_channel)
        return None

    #print experiments
    #print concs
    #print responses

    where_r_0 = responses == 0
    where_r_100 = responses == 100
    where_r_other = (0 < responses) & (responses < 100)

    #print "where_r_0:", where_r_0
    #print "where_r_100:", where_r_100
    #print "where_r_other:", where_r_other

    pi_bit = dr.compute_pi_bit_of_log_likelihood(where_r_other)

    # plot priors
    for i in xrange(num_params):
        fig = plt.figure(figsize=(4, 3))
        ax = fig.add_subplot(111)
        ax.grid()
        ax.plot(dr.prior_xs[i], dr.prior_pdfs[i], color='blue', lw=2)
        ax.set_xlabel(dr.labels[i])
        ax.set_ylabel("Prior pdf")
        fig.tight_layout()
        fig.savefig(images_dir + dr.file_labels[i] + "_prior_pdf.pdf")
        plt.close()

    start = time.time()

    sigma0 = 0.1
    opts = cma.CMAOptions()
    opts['seed'] = seed
    if args.model == 1:
        #x0 = np.array([2.5, 3.])
        x0 = np.array([2.5, 1.])
        es = cma.CMAEvolutionStrategy(x0, sigma0, opts)
        while not es.stop():
            X = es.ask()
            #es.tell(X, [-dr.log_target(responses, where_r_0, where_r_100, where_r_other, concs, x**2 + [dr.pic50_exp_lower,dr.sigma_uniform_lower], temperature, pi_bit) for x in X])
            es.tell(X, [
                sum_of_square_diffs([x[0]**2 + dr.pic50_exp_lower, 1.], concs,
                                    responses) for x in X
            ])
            es.disp()
        res = es.result
        #pic50_cur, sigma_cur = res[0]**2 + [dr.pic50_exp_lower, dr.sigma_uniform_lower]
        pic50_cur = res[0][0]**2 + dr.pic50_exp_lower
        hill_cur = 1
    elif args.model == 2:
        #x0 = np.array([2.5, 1., 3.])
        x0 = np.array([2.5, 1.])
        es = cma.CMAEvolutionStrategy(x0, sigma0, opts)
        while not es.stop():
            X = es.ask()
            #es.tell(X, [-dr.log_target(responses, where_r_0, where_r_100, where_r_other, concs, x**2 + [dr.pic50_exp_lower, dr.hill_uniform_lower, dr.sigma_uniform_lower], temperature, pi_bit) for x in X])
            es.tell(X, [
                sum_of_square_diffs(
                    x**2 + [dr.pic50_exp_lower, dr.hill_uniform_lower], concs,
                    responses) for x in X
            ])
            es.disp()
        res = es.result
        #pic50_cur, hill_cur, sigma_cur = res[0]**2 + [dr.pic50_exp_lower, dr.hill_uniform_lower, dr.sigma_uniform_lower]
        pic50_cur, hill_cur = res[0]**2 + [
            dr.pic50_exp_lower, dr.hill_uniform_lower
        ]

    sigma_cur = initial_sigma(len(responses), res[1])
    #print "sigma_cur:", sigma_cur

    if args.model == 1:
        theta_cur = np.array([pic50_cur, sigma_cur])
    elif args.model == 2:
        theta_cur = np.array([pic50_cur, hill_cur, sigma_cur])

    #print "theta_cur:", theta_cur

    best_params_file = images_dir + "{}_{}_best_fit_params.txt".format(
        drug, channel)
    with open(best_params_file, "w") as outfile:
        outfile.write("# CMA-ES best fit params\n")
        if args.model == 1:
            outfile.write("# pIC50, sigma, (Hill=1, not included)\n")
        elif args.model == 2:
            outfile.write("# pIC50, Hill, sigma\n")
        np.savetxt(outfile, [theta_cur])

    proposal_scale = 0.05

    mean_estimate = np.copy(theta_cur)
    cov_estimate = proposal_scale * np.diag(np.copy(np.abs(theta_cur)))

    cmaes_ll = dr.log_target(responses, where_r_0, where_r_100, where_r_other,
                             concs, theta_cur, temperature, pi_bit)
    #print "cmaes_ll:", cmaes_ll

    best_fit_fig = plt.figure(figsize=(5, 4))
    best_fit_ax = best_fit_fig.add_subplot(111)
    best_fit_ax.set_xscale('log')
    best_fit_ax.grid()
    if np.min(concs) == 0:
        plot_lower_lim = int(np.log10(np.min(concs[np.nonzero(concs)]))) - 2
    else:
        plot_lower_lim = int(np.log10(np.min(concs))) - 2
    plot_upper_lim = int(np.log10(np.max(concs))) + 2
    best_fit_ax.set_xlim(10**plot_lower_lim, 10**plot_upper_lim)
    best_fit_ax.set_ylim(0, 100)
    num_x_pts = 1001
    x_range = np.logspace(plot_lower_lim, plot_upper_lim, num_x_pts)
    best_fit_curve = dr.dose_response_model(x_range, hill_cur,
                                            dr.pic50_to_ic50(pic50_cur))
    best_fit_ax.plot(x_range, best_fit_curve, label='Best fit', lw=2)
    best_fit_ax.set_ylabel('% {} block'.format(channel))
    best_fit_ax.set_xlabel(r'{} concentration ($\mu$M)'.format(drug))
    best_fit_ax.set_title(r'$pIC50 = {}, Hill = {}; SS = {}$'.format(
        np.round(pic50_cur, 2), np.round(hill_cur, 2), round(res[1], 2)))
    best_fit_ax.plot(concs,
                     responses,
                     "o",
                     color='orange',
                     ms=10,
                     label='Data',
                     zorder=10)
    best_fit_ax.legend(loc=2)
    best_fit_fig.tight_layout()
    best_fit_fig.savefig(
        images_dir +
        '{}_{}_model_{}_CMA-ES_best_fit.png'.format(drug, channel, args.model))
    best_fit_fig.savefig(
        images_dir +
        '{}_{}_model_{}_CMA-ES_best_fit.pdf'.format(drug, channel, args.model))
    plt.close()

    if args.best_fit_only:
        print "\nStopping {}+{} after doing and plotting best fit\n".format(
            drug, channel)
        return None

    # let MCMC look around for a bit before adaptive covariance matrix
    # same rule (100*dimension) as in hierarchical case
    when_to_adapt = 1000 * num_params

    log_target_cur = dr.log_target(responses, where_r_0, where_r_100,
                                   where_r_other, concs, theta_cur,
                                   temperature, pi_bit)

    #print "initial log_target_cur =", log_target_cur

    # effectively step size, scales covariance matrix
    loga = 0.
    # what fraction of proposed samples are being accepted into the chain
    acceptance = 0.
    # what fraction of samples we WANT accepted into the chain
    # loga updates itself to try to make this dream come true
    target_acceptance = 0.25

    total_iterations = args.iterations
    thinning = args.thinning
    assert (total_iterations % thinning == 0)

    # how often to print a little status message
    status_when = total_iterations / 20

    saved_iterations = total_iterations / thinning + 1
    # also want to store log-target value at each iteration
    chain = np.zeros((saved_iterations, num_params + 1))

    chain[0, :] = np.concatenate((np.copy(theta_cur), [log_target_cur]))
    #print chain[0]

    #print "concs:", concs
    #print "responses:", responses

    # for reproducible results, otherwise select a new random seed
    seed = 25
    npr.seed(seed)

    # MCMC!
    t = 1
    start = time.time()
    while t <= total_iterations:
        theta_star = npr.multivariate_normal(theta_cur,
                                             np.exp(loga) * cov_estimate)
        accepted = 0
        log_target_star = dr.log_target(responses, where_r_0, where_r_100,
                                        where_r_other, concs, theta_star,
                                        temperature, pi_bit)
        accept_prob = npr.rand()
        if (np.log(accept_prob) < log_target_star - log_target_cur):
            theta_cur = theta_star
            log_target_cur = log_target_star
            accepted = 1
        acceptance = ((t - 1.) * acceptance + accepted) / t
        if (t > when_to_adapt):
            s = t - when_to_adapt
            gamma_s = 1 / (s + 1)**0.6
            temp_covariance_bit = np.array([theta_cur - mean_estimate])
            cov_estimate = (1 - gamma_s) * cov_estimate + gamma_s * np.dot(
                np.transpose(temp_covariance_bit), temp_covariance_bit)
            mean_estimate = (1 - gamma_s) * mean_estimate + gamma_s * theta_cur
            loga += gamma_s * (accepted - target_acceptance)
        if (t % thinning == 0):
            chain[t / thinning, :] = np.concatenate(
                (np.copy(theta_cur), [log_target_cur]))
        if (t % status_when == 0):
            #print "{} / {}".format(t/status_when,total_iterations/status_when)
            time_taken_so_far = time.time() - start
            estimated_time_left = time_taken_so_far / t * (total_iterations -
                                                           t)
            #print "Time taken: {} s = {} min".format(np.round(time_taken_so_far,1),np.round(time_taken_so_far/60,2))
            #print "acceptance = {}".format(np.round(acceptance,5))
            #print "Estimated time remaining: {} s = {} min".format(np.round(estimated_time_left,1),np.round(estimated_time_left/60,2))
        t += 1

    #print "\nTime taken to do {} MCMC iterations: {} s\n".format(total_iterations, time.time()-start)
    #print "Final iteration:", chain[-1,:], "\n"

    burn_fraction = args.burn_in_fraction
    burn = saved_iterations / burn_fraction

    chain = chain[burn:, :]  # remove burn-in before saving
    with open(chain_file, 'w') as outfile:
        outfile.write(
            '# Nonhierarchical MCMC output for {} + {}: (Hill,pIC50,sigma,log-target)\n'
            .format(drug, channel))
        np.savetxt(outfile, chain)

    best_ll_index = np.argmax(chain[:, num_params])
    best_ll_row = chain[best_ll_index, :]
    #print "Best log-likelihood:", "\n", best_ll_row

    figs = []
    axs = []
    # plot all marginal posterior distributions
    for i in range(num_params):
        figs.append(plt.figure())
        axs.append([])
        axs[i].append(figs[i].add_subplot(211))
        axs[i][0].hist(chain[:, i],
                       bins=40,
                       normed=True,
                       color='blue',
                       edgecolor='blue')
        axs[i][0].legend()
        axs[i][0].set_title("MCMC marginal distributions")
        axs[i][0].set_ylabel("Normalised frequency")
        axs[i][0].grid()
        plt.setp(axs[i][0].get_xticklabels(), visible=False)
        axs[i].append(figs[i].add_subplot(212, sharex=axs[i][0]))
        axs[i][1].plot(chain[:, i], range(burn, saved_iterations))
        axs[i][1].invert_yaxis()
        axs[i][1].set_xlabel(dr.labels[i])
        axs[i][1].set_ylabel('Saved MCMC iteration')
        axs[i][1].grid()
        figs[i].tight_layout()
        figs[i].savefig(images_dir + '{}_{}_model_{}_{}_marginal.png'.format(
            drug, channel, args.model, dr.file_labels[i]))
        plt.close()

    # plot log-target path
    fig2 = plt.figure()
    ax3 = fig2.add_subplot(111)
    ax3.plot(range(burn, saved_iterations), chain[:, -1])
    ax3.set_xlabel('MCMC iteration')
    ax3.set_ylabel('log-target')
    ax3.grid()
    fig2.tight_layout()
    fig2.savefig(images_dir + 'log_target.png')
    plt.close()

    # plot scatterplot matrix of posterior(s)
    colormin, colormax = 1e9, 0
    norm = matplotlib.colors.Normalize(vmin=5, vmax=10)
    hidden_labels = []
    count = 0
    # there's probably a better way to do this
    # I plot all the histograms to normalize the colours, in an attempt to give a better comparison between the pairwise plots
    while count < 2:
        axes = {}
        matrix_fig = plt.figure(figsize=(3 * num_params, 3 * num_params))
        for i in range(num_params):
            for j in range(i + 1):
                ij = str(i) + str(j)
                subplot_position = num_params * i + j + 1
                if i == j:
                    axes[ij] = matrix_fig.add_subplot(num_params, num_params,
                                                      subplot_position)
                    axes[ij].hist(chain[:, i],
                                  bins=50,
                                  normed=True,
                                  color='blue',
                                  edgecolor='blue')
                elif j == 0:  # this column shares x-axis with top-left
                    axes[ij] = matrix_fig.add_subplot(num_params,
                                                      num_params,
                                                      subplot_position,
                                                      sharex=axes["00"])
                    counts, xedges, yedges, Image = axes[ij].hist2d(
                        chain[:, j],
                        chain[:, i],
                        cmap='hot_r',
                        bins=50,
                        norm=norm)
                    maxcounts = np.amax(counts)
                    if maxcounts > colormax:
                        colormax = maxcounts
                    mincounts = np.amin(counts)
                    if mincounts < colormin:
                        colormin = mincounts
                else:
                    axes[ij] = matrix_fig.add_subplot(
                        num_params,
                        num_params,
                        subplot_position,
                        sharex=axes[str(j) + str(j)],
                        sharey=axes[str(i) + "0"])
                    counts, xedges, yedges, Image = axes[ij].hist2d(
                        chain[:, j],
                        chain[:, i],
                        cmap='hot_r',
                        bins=50,
                        norm=norm)
                    maxcounts = np.amax(counts)
                    if maxcounts > colormax:
                        colormax = maxcounts
                    mincounts = np.amin(counts)
                    if mincounts < colormin:
                        colormin = mincounts
                axes[ij].xaxis.grid()
                if (i != j):
                    axes[ij].yaxis.grid()
                if i != num_params - 1:
                    hidden_labels.append(axes[ij].get_xticklabels())
                if j != 0:
                    hidden_labels.append(axes[ij].get_yticklabels())
                if i == j == 0:
                    hidden_labels.append(axes[ij].get_yticklabels())
                if i == num_params - 1:
                    axes[str(i) + str(j)].set_xlabel(dr.labels[j], fontsize=18)
                if j == 0 and i > 0:
                    axes[str(i) + str(j)].set_ylabel(dr.labels[i], fontsize=18)

                plt.xticks(rotation=30)
        norm = matplotlib.colors.Normalize(vmin=colormin, vmax=colormax)
        count += 1

    plt.setp(hidden_labels, visible=False)

    matrix_fig.tight_layout()
    matrix_fig.savefig(images_dir +
                       "{}_{}_model_{}_scatterplot_matrix.png".format(
                           drug, channel, args.model))
    matrix_fig.savefig(images_dir +
                       "{}_{}_model_{}_scatterplot_matrix.pdf".format(
                           drug, channel, args.model))
    plt.close()

    print "\n\n{} + {} complete!\n\n".format(drug, channel)
    return None
Example #8
0
def run_hierarchical(drug_channel):
    global pic50_prior
    pic50_prior = [
        -2.
    ]  # bad way to deal with sum_of_square_diffs in hierarchical case

    global pic50_hill_lowers
    pic50_hill_priors_lowers = np.array([-2., 0.])

    drug, channel = drug_channel

    print "\n\n{} + {}\n\n".format(drug, channel)

    # for reproducible results, otherwise choose a different seed
    seed = 1

    num_expts, experiment_numbers, experiments = dr.load_crumb_data(
        drug, channel)
    if (0 < (args.num_expts) < num_expts):
        num_expts = args.num_expts
        experiment_numbers = [x for x in experiment_numbers[:num_expts]]
        experiments = [x for x in experiments[:num_expts]]
    elif (args.num_expts == 0):
        print "Fitting to all datasets\n"
    else:
        print "You've asked to fit to an impossible number of experiments for {} + {}\n".format(
            drug, channel)
        print "Therefore proceeding with all experiments in the input data file\n"

    # set up where to save chains and figures to
    # also renames anything with a '/' in its name and changes it to a '_'
    drug, channel, output_dir, chain_dir, figs_dir, chain_file = dr.hierarchical_output_dirs_and_chain_file(
        drug, channel, num_expts)

    best_fits = []
    for expt in experiment_numbers:
        start = time.time()
        x0 = np.array([2.5, 1.])  # (pIC50,Hill) not fitting sigma by CMA-ES
        sigma0 = 0.1
        opts = cma.CMAOptions()
        opts['seed'] = expt
        es = cma.CMAEvolutionStrategy(x0, sigma0, opts)
        while not es.stop():
            X = es.ask()
            es.tell(X, [
                sum_of_square_diffs(x**2 + pic50_hill_priors_lowers,
                                    experiments[expt][:, 0],
                                    experiments[expt][:, 1]) for x in X
            ])
        res = es.result
        best_fits.append(
            np.concatenate(
                (res[0]**2 + pic50_hill_priors_lowers,
                 [initial_sigma(len(experiments[expt][:, 0]), res[1])])))

    best_fits = np.array(best_fits)

    fig = plt.figure(figsize=(5.5, 4.5))
    ax = fig.add_subplot(111)
    ax.set_xscale('log')
    xmin = 1000
    xmax = -1000
    for expt in experiments:
        a = np.min(expt[:, 0])
        b = np.max(expt[:, 0])
        if a < xmin:
            xmin = a
        if b > xmax:
            xmax = b
    xmin = int(np.log10(xmin)) - 1
    xmax = int(np.log10(xmax)) + 3
    num_x_pts = 101
    x = np.logspace(xmin, xmax, num_x_pts)
    # from http://colorbrewer2.org
    colors = [
        '#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99', '#e31a1c',
        '#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a', '#ffff99', '#b15928'
    ]
    skip_best_fits_plot = False
    if (num_expts > len(colors)):
        skip_best_fits_plot = True
        print "Not enough colours to print all experiments' best fits, so skipping that"

    if (not skip_best_fits_plot):
        for expt in experiment_numbers:
            print "best_fits:", best_fits
            print "best_fits[{}]:".format(expt), best_fits[expt]
            ax.plot(x,
                    dr.dose_response_model(
                        x, best_fits[expt, 1],
                        dr.pic50_to_ic50(best_fits[expt, 0])),
                    color=colors[expt],
                    lw=2)
            ax.scatter(experiments[expt][:, 0],
                       experiments[expt][:, 1],
                       label='Expt {}'.format(expt + 1),
                       color=colors[expt],
                       s=100)
        ax.set_ylim(0, 100)
        ax.set_xlim(min(x), max(x))
        ax.set_xlabel(r'{} concentration ($\mu$M)'.format(drug))
        ax.set_ylabel('% {} block'.format(channel))
        ax.legend(loc=2)
        ax.grid()
        ax.set_title('Hills = {}\nIC50s = {}'.format(
            [round(best_fits[expt, 1], 1) for expt in experiment_numbers], [
                round(dr.pic50_to_ic50(best_fits[expt, 0]), 1)
                for expt in experiment_numbers
            ]))
        fig.tight_layout()
        fig.savefig(figs_dir +
                    '{}_{}_cma-es_best_fits.png'.format(drug, channel))
        fig.savefig(figs_dir +
                    '{}_{}_cma-es_best_fits.pdf'.format(drug, channel))
    plt.close()

    locs = np.array([0., 2., -4, 0.01,
                     dr.sigma_loc])  # lower bounds for alpha,beta,mu,s,sigma

    sigma_cur = np.mean(best_fits[:, -1])
    if (sigma_cur <= locs[3]):
        sigma_cur = locs[3] + 0.1
    print "sigma_cur =", sigma_cur

    # find initial alpha and beta values by fitting log-logistic distribution to best fits
    # there is an inbuilt fit function, but I found it to be unreliable for some reason
    x0 = np.array([0.5, 0.5])
    sigma0 = 0.1
    opts = cma.CMAOptions()
    opts['seed'] = 1
    es = cma.CMAEvolutionStrategy(x0, sigma0, opts)
    while not es.stop():
        X = es.ask()
        es.tell(X, [
            -np.product(st.fisk.pdf(best_fits[:, 1], c=x[1], scale=x[0],
                                    loc=0)) for x in X
        ])
    res = es.result

    alpha_cur, beta_cur = np.copy(res[0])
    if alpha_cur <= locs[0]:
        alpha_cur = locs[0] + 0.1
    if beta_cur <= locs[1]:
        beta_cur = locs[1] + 0.1

    # here I have used the fit function, for some reason this one worked more consitently
    # but again, the starting point for MCMC is not too important
    # a bad starting position can increase the time you have to run MCMC for to get a "converged" output
    # at worst, it can get stuck in a local optimum, but we haven't found this to be a problem yet
    mu_cur, s_cur = st.logistic.fit(best_fits[:, 0])
    if mu_cur <= locs[2]:
        mu_cur = locs[2] + 0.1
    if s_cur <= locs[3]:
        s_cur = locs[3] + 0.1

    first_iteration = np.concatenate(
        ([alpha_cur, beta_cur, mu_cur,
          s_cur], best_fits[:, :-1].flatten(), [sigma_cur]))
    print "first mcmc iteration:\n", first_iteration

    # these are the numbers taken straight from Elkins (see paper for reference)
    elkins_hill_alphas = np.array([
        1.188, 1.744, 1.530, 0.930, 0.605, 1.325, 1.179, 0.979, 1.790, 1.708,
        1.586, 1.469, 1.429, 1.127, 1.011, 1.318, 1.063
    ])
    elkins_hill_betas = 1. / np.array([
        0.0835, 0.1983, 0.2089, 0.1529, 0.1206, 0.2386, 0.2213, 0.2263, 0.1784,
        0.1544, 0.2486, 0.2031, 0.2025, 0.1510, 0.1837, 0.1677, 0.0862
    ])
    elkins_pic50_mus = np.array([
        5.235, 5.765, 6.060, 5.315, 5.571, 7.378, 7.248, 5.249, 6.408, 5.625,
        7.321, 6.852, 6.169, 6.217, 5.927, 7.414, 4.860
    ])
    elkins_pic50_sigmas = np.array([
        0.0760, 0.1388, 0.1459, 0.2044, 0.1597, 0.2216, 0.1856, 0.1560, 0.1034,
        0.1033, 0.1914, 0.1498, 0.1464, 0.1053, 0.1342, 0.1808, 0.0860
    ])

    elkins = [
        elkins_hill_alphas, elkins_hill_betas, elkins_pic50_mus,
        elkins_pic50_sigmas
    ]

    # building Gamma prior distributions for alpha,beta,mu,s(,sigma, but sigma not from elkins)
    # wide enough to cover Elkins values and allow room for extra variation
    alpha_mode = np.mean(elkins_hill_alphas)
    beta_mode = np.mean(elkins_hill_betas)
    mu_mode = np.mean(elkins_pic50_mus)
    s_mode = np.mean(elkins_pic50_sigmas)
    sigma_mode = dr.sigma_mode

    modes = np.array([alpha_mode, beta_mode - 2., mu_mode, s_mode, sigma_mode])

    print "modes:", modes

    # designed for priors to have modes at means of elkins data, but width is more important

    shapes = np.array([5., 2.5, 7.5, 2.5,
                       dr.sigma_shape])  # must all be greater than 1
    scales = (modes - locs) / (shapes - 1.)

    labels = [r'$\alpha$', r'$\beta$', r'$\mu$', r'$s$', r'$\sigma$']
    file_labels = ['alpha', 'beta', 'mu', 's', 'sigma']

    # ranges to plot priors
    mins = [0, 0, -5, 0, 0]
    maxs = [8, 22, 20, 2, 25]

    prior_xs = []
    priors = []

    total_axes = (6, 4)
    fig = plt.figure(figsize=(6, 7))
    for i in range(len(labels) - 1):
        if i == 0:
            axloc = (0, 0)
        elif i == 1:
            axloc = (0, 2)
        elif i == 2:
            axloc = (2, 0)
        elif i == 3:
            axloc = (2, 2)
        ax = plt.subplot2grid(total_axes, axloc, colspan=2, rowspan=2)
        x_prior = np.linspace(mins[i], maxs[i], 501)
        prior = st.gamma.pdf(x_prior,
                             a=shapes[i],
                             scale=scales[i],
                             loc=locs[i])
        prior_xs.append(x_prior)
        priors.append(prior)
        ax.plot(x_prior, prior, label='Gamma prior', lw=2)
        ax.set_xlabel(labels[i])
        ax.set_ylabel('Probability density')
        ax.set_xlim(mins[i], maxs[i])
        ax.grid()
        priormax = np.max(prior)
        hist, bin_edges = np.histogram(elkins[i], bins=10)
        histmax = np.max(hist)
        w = bin_edges[1] - bin_edges[0]
        bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2.
        # scaled histogram just to fit plot better, but this scaling doesn't matter
        ax.bar(bin_edges[:-1],
               priormax / histmax * hist,
               width=w,
               color='gray',
               edgecolor='grey')
    i = len(labels) - 1
    ax = plt.subplot2grid(total_axes, (4, 1), colspan=2, rowspan=2)
    x_prior = np.linspace(mins[i], maxs[i], 501)
    prior = st.gamma.pdf(x_prior, a=shapes[i], scale=scales[i], loc=locs[i])
    ax.plot(x_prior, prior, label='Gamma prior', lw=2)
    prior_xs.append(x_prior)
    priors.append(prior)

    ax.set_xlabel(labels[i])
    ax.set_ylabel('Probability density')
    ax.set_xlim(mins[i], maxs[i])
    ax.grid()
    fig.tight_layout()
    fig.savefig(figs_dir + 'all_prior_distributions.png')
    fig.savefig(figs_dir + 'all_prior_distributions.pdf')
    plt.close()

    #sys.exit # uncomment this if you just want to plot the priors and then quit

    # create/wipe MCMC output file
    with open(chain_file, 'w') as outfile:
        outfile.write(
            "# Hill ~ log-logistic(alpha,beta), pIC50 ~ logistic(mu,s)\n")
        outfile.write(
            "# alpha, beta, mu, s, hill_1, pic50_1, hill_2, pic50_2, ..., hill_Ne, pic50_Ne, sigma\n"
        )  # this is the order of parameters stored in the chain

    # have to choose initial covariance matrix for proposal distribution
    # we set it to a diagonal with entries scaled to the initial parameter values
    first_cov = np.diag(0.01 * np.abs(first_iteration))

    mean_estimate = np.copy(first_iteration)

    dim = len(first_iteration)

    # we do not start adaptation straight away
    # just to give the algorithm a chance to look around
    # many of these pre-adaptation proposals will probably be rejected, if the initial step size is too lareg
    when_to_adapt = 100 * dim

    theta_cur = np.copy(first_iteration)
    cov_cur = np.copy(first_cov)

    print "theta_cur =", theta_cur

    log_target_cur = log_target_distribution(experiments, theta_cur, shapes,
                                             scales, locs)

    print "initial log_target_cur =", log_target_cur

    # effectively step size, scales covariance matrix
    loga = 0.
    # what fraction of proposed samples are being accepted into the chain
    acceptance = 0.
    # what fraction of samples we WANT accepted into the chain
    # loga updates itself to try to make this dream come true
    target_acceptance = 0.25

    # perform thinning to reduce autocorrelation (make saved iterations more closely represent independent samples from target distribution)
    # also saves file space, win win
    thinning = args.thinning

    try:
        total_iterations = args.iterations
    except:
        total_iterations = 200000
    # after what fraction of total_iterations to print a little status message
    status_when = 10000
    saved_iterations = total_iterations / thinning + 1
    pre_thin_burn = total_iterations / 4
    # we discard the first quarter of iterations, as this gen
    burn = saved_iterations / 4

    # pre-allocate the space for MCMC iterations
    # not a problem when we don't need to do LOADS of iterations
    # but might become more of a hassle if we wanted to run it for ages along with loads of parameters
    chain = np.zeros((saved_iterations, dim + 1))
    chain[0, :] = np.copy(np.concatenate((first_iteration, [log_target_cur])))

    # MCMC!
    start = time.time()
    t = 1
    while t <= total_iterations:
        theta_star = npr.multivariate_normal(theta_cur, np.exp(loga) * cov_cur)
        log_target_star = log_target_distribution(experiments, theta_star,
                                                  shapes, scales, locs)
        accept_prob = npr.rand()
        if (np.log(accept_prob) < log_target_star - log_target_cur):
            theta_cur = theta_star
            log_target_cur = log_target_star
            accepted = 1
        else:
            accepted = 0
        acceptance = ((t - 1.) * acceptance + accepted) / t
        if (t > when_to_adapt):
            s = t - when_to_adapt
            gamma_s = 1 / (s + 1)**0.6
            temp_covariance_bit = np.array([theta_cur - mean_estimate])
            cov_cur = (1 - gamma_s) * cov_cur + gamma_s * np.dot(
                np.transpose(temp_covariance_bit), temp_covariance_bit)
            mean_estimate = (1 - gamma_s) * mean_estimate + gamma_s * theta_cur
            loga += gamma_s * (accepted - target_acceptance)
        if t % thinning == 0:
            chain[t / thinning, :] = np.concatenate(
                (np.copy(theta_cur), [log_target_cur]))
        if (t % status_when == 0):
            print "{} / {}".format(t / status_when,
                                   total_iterations / status_when)
            time_taken_so_far = time.time() - start
            estimated_time_left = time_taken_so_far / t * (total_iterations -
                                                           t)
            print "Time taken: {} s = {} min".format(
                np.round(time_taken_so_far, 1),
                np.round(time_taken_so_far / 60, 2))
            print "acceptance = {}".format(np.round(acceptance, 5))
            print "Estimated time remaining: {} s = {} min".format(
                np.round(estimated_time_left, 1),
                np.round(estimated_time_left / 60, 2))
        t += 1
    print "**********"
    print "final_iteration =", chain[-1, :]
    with open(chain_file, 'a') as outfile:
        np.savetxt(outfile, chain)

    # save (alpha,mu) samples to be used as (Hill,pIC50) values in AP simulations
    # these are direct 'top-level' samples, not samples from the posterior predictive distributions
    indices = npr.randint(burn, saved_iterations, args.num_APs)
    samples_file = dr.alpha_mu_downsampling(drug, channel)
    AP_samples = chain[indices, :]
    print "saving (alpha,mu) samples to", samples_file
    with open(samples_file, 'w') as outfile:
        outfile.write(
            '# {} (alpha,mu) samples from hierarchical MCMC for {} + {}\n'.
            format(args.num_APs, drug, channel))
        np.savetxt(outfile, AP_samples[:, [0, 2]])

    # this can be a quick visual check to see if the chain is mixing well
    # it will plot one big tall figure with all parameter paths plotted
    if args.plot_parameter_paths:
        fig = plt.figure(figsize=(10, 4 * dim))
        ax0 = fig.add_subplot(dim, 1, 1)
        ax0.plot(chain[:, 0])
        ax0.set_ylabel(r'$\alpha$')
        plt.setp(ax0.get_xticklabels(), visible=False)
        for i in range(1, dim):
            ax = fig.add_subplot(dim, 1, i + 1, sharex=ax0)
            ax.plot(chain[:t, i])
            if i < dim - 1:
                plt.setp(ax.get_xticklabels(), visible=False)
            elif i == 1:
                y_label = r'$\beta$'
            elif i == 2:
                y_label = r'$\mu$'
            elif i == 3:
                y_label = r'$s$'
            elif (i % 2 == 0) and (i < dim - 1):
                y_label = r'$pIC50_{' + str(i / 2 - 1) + '}$'
            elif (i < dim - 1):
                y_label = r'$Hill_{' + str(i / 2 - 1) + '}$'
            else:
                y_label = r'$\sigma$'
                ax.set_xlabel('Iteration (thinned)')
            ax.set_ylabel(y_label)
        fig.tight_layout()
        fig.savefig(figs_dir +
                    '{}_{}_parameter_paths.png'.format(drug, channel))
        plt.close()

    # plot all marginal posteriors separately, after discarding burn-in
    # also a good visual check to see if it looks like they have converged
    marginals_dir = figs_dir + 'marginals/png/'
    if not os.path.exists(marginals_dir):
        os.makedirs(marginals_dir)
    for i in range(dim):
        fig = plt.figure(figsize=(5, 4))
        ax = fig.add_subplot(111)
        ax.hist(chain[burn:, i],
                bins=50,
                normed=True,
                color='blue',
                edgecolor='blue')
        ax.set_ylabel('Marginal probability density')
        if i == 0:
            x_label = r'$\alpha$'
            filename = 'alpha'
        elif i == 1:
            x_label = r'$\beta$'
            filename = 'beta'
        elif i == 2:
            x_label = r'$\mu$'
            filename = 'mu'
        elif i == 3:
            x_label = r'$s$'
            filename = 's'
        elif (i % 2 == 0) and (i < dim - 1):
            x_label = r'$Hill_{' + str(i / 2 - 1) + '}$'
            filename = 'hill_{}'.format(i / 2 - 1)
        elif (i < dim - 1):
            x_label = r'$pIC50_{' + str(i / 2 - 1) + '}$'
            filename = 'pic50_{}'.format(i / 2 - 1)
        else:
            x_label = r'$\sigma$'
            filename = 'sigma'
        ax.set_xlabel(x_label)
        fig.tight_layout()
        fig.savefig(marginals_dir +
                    '{}_{}_{}_marginal.png'.format(drug, channel, filename))
        #fig.savefig(marginals_dir+'{}_{}_{}_marginal.pdf'.format(drug,channel,filename))
        plt.close()

    total_axes = (6, 4)
    fig = plt.figure(figsize=(6, 7))
    for i in range(5):  # have to do sigma separately
        if i == 0:
            axloc = (0, 0)
        elif i == 1:
            axloc = (0, 2)
        elif i == 2:
            axloc = (2, 0)
        elif i == 3:
            axloc = (2, 2)
        elif i == 4:
            axloc = (4, 0)
        ax = plt.subplot2grid(total_axes, axloc, colspan=2, rowspan=2)

        ax.set_xlabel(labels[i])
        ax.set_ylabel('Probability density')

        ax.grid()
        if (i < 4):
            min_sample = np.min(chain[burn:, i])
            max_sample = np.max(chain[burn:, i])
            ax.hist(chain[burn:, i],
                    bins=50,
                    normed=True,
                    color='blue',
                    edgecolor='blue')
        elif (i == 4):
            min_sample = np.min(chain[burn:, -2])
            max_sample = np.max(chain[burn:, -2])
            ax.hist(chain[burn:, -2],
                    bins=50,
                    normed=True,
                    color='blue',
                    edgecolor='blue')  # -1 would be log-target
        ax.set_xlim(min_sample, max_sample)
        pts_in_this_range = np.where((prior_xs[i] >= min_sample)
                                     & (prior_xs[i] <= max_sample))
        x_in_this_range = prior_xs[i][pts_in_this_range]
        prior_in_this_range = priors[i][pts_in_this_range]
        line = ax.plot(x_in_this_range,
                       prior_in_this_range,
                       lw=2,
                       color='red',
                       label='Prior distributions')
        if (i == 0 or i == 3):
            plt.xticks(rotation=90)

    leg_ax = plt.subplot2grid(total_axes, (4, 2), colspan=2, rowspan=2)
    leg_ax.axis('off')
    hist = mpatches.Patch(color='blue', label='Normalised histograms')
    leg_ax.legend(handles=line + [hist],
                  loc="center",
                  fontsize=12,
                  bbox_to_anchor=[0.38, 0.7])

    fig.tight_layout()
    fig.savefig(figs_dir + 'all_prior_distributions_and_marginals.png')
    fig.savefig(figs_dir + 'all_prior_distributions_and_marginals.pdf')
    plt.close()

    print "Marginal plots saved in", marginals_dir

    print "\n\n{} + {} complete!\n\n".format(drug, channel)
Example #9
0
def sum_of_square_diffs(unscaled_params, doses, responses):
    hill = unscaled_params[0]**2  # restricting Hill>0
    pIC50 = unscaled_params[1]**2 - 1  # restricting pIC50>-1
    IC50 = dr.pic50_to_ic50(pIC50)
    test_responses = dr.dose_response_model(doses, hill, IC50)
    return np.sum((test_responses - responses)**2)
def plot_mcmc_samples(drug_channel):
    drug, channel = drug_channel

    fig = plt.figure(figsize=(5, 8))
    axes = {}
    axes[1] = fig.add_subplot(211)
    axes[2] = fig.add_subplot(212)

    #drug = "Amiodarone"
    #channel = "hERG"
    # drug = "Lopinavir"
    # channel = "Kir2.1"

    num_models = 2
    for model in xrange(1, num_models + 1):

        dr.define_model(model)

        chain_file = dr.define_chain_file(model, drug, channel, temperature)

        num_expts, experiment_numbers, experiments = dr.load_crumb_data(
            drug, channel)
        figs_dir = dr.drug_channel_figs_dir(drug, channel)

        concs = np.array([])
        responses = np.array([])
        for i in xrange(num_expts):
            concs = np.concatenate((concs, experiments[i][:, 0]))
            responses = np.concatenate((responses, experiments[i][:, 1]))

        how_many_samples_to_plot = 1200

        mcmc_samples = np.loadtxt(chain_file, usecols=range(dr.num_params))
        saved_its = mcmc_samples.shape[0]
        sample_indices = npr.randint(0, saved_its, how_many_samples_to_plot)

        mcmc_samples = mcmc_samples[sample_indices]

        conc_min = np.min(concs)
        conc_max = np.max(concs)

        fsize = 14

        num_pts = 101
        x_range = np.logspace(
            int(np.log10(conc_min)) - 1,
            int(np.log10(conc_max)) + 2, num_pts)

        axes[model].set_xscale('log')
        axes[model].grid()
        axes[model].set_ylabel(r"% {} block".format(channel), fontsize=fsize)
        axes[model].set_xlabel(r"{} concentration ($\mu$M)".format(drug),
                               fontsize=fsize)
        axes[model].set_ylim(0, 100)

        for i in xrange(how_many_samples_to_plot):
            if model == 1:
                pic50 = mcmc_samples[i, 0]
                hill = 1
                title = "$M_1$, fixed $Hill=1$, varying $pIC50$"
            elif model == 2:
                pic50, hill = mcmc_samples[i, :2]
                title = "$M_2$, varying $pIC50$ and $Hill$"
            axes[model].plot(x_range,
                             dr.dose_response_model(x_range, hill,
                                                    dr.pic50_to_ic50(pic50)),
                             color='black',
                             alpha=0.01)
        axes[model].plot(concs,
                         responses,
                         'o',
                         color='orange',
                         ms=10,
                         label="Expt data")
        axes[model].set_title(title, fontsize=fsize)
        axes[model].legend(loc=2)
    #axes[2].set_yticklabels([])
    fig.tight_layout()
    fig.savefig(all_figs_dir + '{}_{}_mcmc_samples.png'.format(drug, channel))
    fig.savefig(figs_dir +
                '{}_{}_mcmc_samples.pdf'.format(drug, channel, model))
    plt.close()
    return None
Example #11
0
def run(drug_channel):
    drug, channel = drug_channel
    print "\n\n{} + {}\n\n".format(drug, channel)

    num_expts, experiment_numbers, experiments = dr.load_crumb_data(
        drug, channel)
    if (0 < args.num_expts < num_expts):
        num_expts = args.num_expts

    drug, channel, output_dir, chain_dir, figs_dir, chain_file = dr.hierarchical_output_dirs_and_chain_file(
        drug, channel, num_expts)

    chain = np.loadtxt(chain_file)
    end = chain.shape[0]
    burn = end / 4

    pic50_samples = np.zeros(args.num_hist_samples)
    hill_samples = np.zeros(args.num_hist_samples)
    rand_idx = npr.randint(burn, end, args.num_hist_samples)
    for t in xrange(args.num_hist_samples):
        alpha, beta, mu, s = chain[rand_idx[t], :4]
        hill_samples[t] = st.fisk.rvs(c=beta, scale=alpha, loc=0)
        pic50_samples[t] = st.logistic.rvs(mu, s)

    num_pts = 40
    fig = plt.figure(figsize=(11, 7))

    ax1 = fig.add_subplot(231)
    ax1.grid()
    xmin = -4
    xmax = 3
    concs = np.logspace(xmin, xmax, num_pts)
    ax1.set_xscale('log')
    ax1.set_ylim(0, 100)
    ax1.set_xlabel(r'{} concentration ($\mu$M)'.format(drug))
    ax1.set_ylabel(r'% {} block'.format(channel))
    ax1.set_title('A. Hierarchical predicted\nfuture experiments')
    ax1.set_xlim(10**xmin, 10**xmax)

    for expt in experiment_numbers:
        ax1.scatter(experiments[expt][:, 0],
                    experiments[expt][:, 1],
                    label='Expt {}'.format(expt + 1),
                    color=colors[expt],
                    s=100,
                    zorder=10)

    for i, conc in enumerate(args.concs):
        ax1.axvline(conc,
                    color=colors[3 + i],
                    lw=2,
                    label=r"{} $\mu$M".format(conc),
                    alpha=0.8)

    subset_idx = npr.randint(0, args.num_hist_samples, args.num_samples)
    for i in xrange(
            args.num_samples
    ):  # only plot the first T of the H samples (should be fine because they're all randomly selected)
        ax1.plot(concs,
                 dr.dose_response_model(concs, hill_samples[i],
                                        dr.pic50_to_ic50(pic50_samples[i])),
                 color='black',
                 alpha=0.01)

    lfs = 9

    ax1.legend(loc=2, fontsize=lfs)

    ax2 = fig.add_subplot(234)
    ax2.set_xlim(0, 100)
    ax2.set_xlabel(r'% {} block'.format(channel))
    ax2.set_ylabel(r'Probability density')
    ax2.grid()
    for i, conc in enumerate(args.concs):
        ax2.hist(dr.dose_response_model(conc, hill_samples,
                                        dr.pic50_to_ic50(pic50_samples)),
                 bins=50,
                 normed=True,
                 color=colors[3 + i],
                 alpha=0.8,
                 lw=0,
                 label=r"{} $\mu$M {}".format(conc, drug))

    ax2.set_title('D. Hierarchical predicted\nfuture experiments')
    ax2.legend(loc="best", fontsize=lfs)

    ax3 = fig.add_subplot(232, sharey=ax1, sharex=ax1)
    ax3.grid()
    ax3.set_xscale('log')
    ax3.set_ylim(0, 100)
    ax3.set_xlabel(r'{} concentration ($\mu$M)'.format(drug))
    ax3.set_title('B. Hierarchical inferred\nunderlying effects')
    ax3.set_xlim(10**xmin, 10**xmax)

    for expt in experiment_numbers:
        ax3.scatter(experiments[expt][:, 0],
                    experiments[expt][:, 1],
                    label='Expt {}'.format(expt + 1),
                    color=colors[expt],
                    s=100,
                    zorder=10)

    alpha_indices = npr.randint(burn, end, args.num_samples)
    alpha_samples = chain[alpha_indices, 0]
    mu_samples = chain[alpha_indices, 2]
    for i, conc in enumerate(args.concs):
        ax3.axvline(conc,
                    color=colors[3 + i],
                    lw=2,
                    label=r"{} $\mu$M".format(conc),
                    alpha=0.8)
    for i in xrange(args.num_samples):
        ax3.plot(concs,
                 dr.dose_response_model(concs, alpha_samples[i],
                                        dr.pic50_to_ic50(mu_samples[i])),
                 color='black',
                 alpha=0.01)
    ax3.legend(loc=2, fontsize=lfs)
    ax4 = fig.add_subplot(235, sharey=ax2, sharex=ax2)
    ax4.set_xlim(0, 100)
    ax4.set_xlabel(r'% {} block'.format(channel))
    ax4.grid()

    hist_indices = npr.randint(burn, end, args.num_hist_samples)
    alphas = chain[hist_indices, 0]
    mus = chain[hist_indices, 2]

    for i, conc in enumerate(args.concs):
        ax4.hist(dr.dose_response_model(conc, alphas, dr.pic50_to_ic50(mus)),
                 bins=50,
                 normed=True,
                 color=colors[3 + i],
                 alpha=0.8,
                 lw=0,
                 label=r"{} $\mu$M {}".format(conc, drug))
    ax4.set_title('E. Hierarchical inferred\nunderlying effects')

    plt.setp(ax3.get_yticklabels(), visible=False)
    plt.setp(ax4.get_yticklabels(), visible=False)

    # now plot non-hierarchical

    num_params = 3
    temperature = 1
    if args.fix_hill:
        model = 1
    else:
        model = 2
    drug, channel, chain_file, figs_dir = dr.nonhierarchical_chain_file_and_figs_dir(
        model, drug, channel, temperature)
    chain = np.loadtxt(
        chain_file,
        usecols=range(num_params -
                      1))  # not interested in log-target values right now
    end = chain.shape[0]
    burn = end / 4

    sample_indices = npr.randint(burn, end, args.num_samples)
    samples = chain[sample_indices, :]

    ax5 = fig.add_subplot(233, sharey=ax1, sharex=ax1)
    ax5.grid()
    plt.setp(ax5.get_yticklabels(), visible=False)
    ax5.set_xscale('log')
    ax5.set_ylim(0, 100)
    ax5.set_xlim(10**xmin, 10**xmax)
    ax5.set_xlabel(r'{} concentration ($\mu$M)'.format(drug))
    ax5.set_title('C. Single-level inferred\neffects')
    ax4.legend(loc="best", fontsize=lfs)

    for expt in experiment_numbers:
        if expt == 1:
            ax5.scatter(experiments[expt][:, 0],
                        experiments[expt][:, 1],
                        color='orange',
                        s=100,
                        label='All expts',
                        zorder=10)
        else:
            ax5.scatter(experiments[expt][:, 0],
                        experiments[expt][:, 1],
                        color='orange',
                        s=100,
                        zorder=10)

    for i, conc in enumerate(args.concs):
        ax5.axvline(conc,
                    color=colors[3 + i],
                    alpha=0.8,
                    lw=2,
                    label=r"{} $\mu$M".format(conc))
    for i in xrange(args.num_samples):
        pic50, hill = samples[i, :]
        ax5.plot(concs,
                 dr.dose_response_model(concs, hill, dr.pic50_to_ic50(pic50)),
                 color='black',
                 alpha=0.01)
    ax5.legend(loc=2, fontsize=lfs)

    sample_indices = npr.randint(burn, end, args.num_hist_samples)
    samples = chain[sample_indices, :]
    ax6 = fig.add_subplot(236, sharey=ax2, sharex=ax2)
    ax6.set_xlim(0, 100)
    ax6.set_xlabel(r'% {} block'.format(channel))
    plt.setp(ax6.get_yticklabels(), visible=False)
    ax6.grid()
    for i, conc in enumerate(args.concs):
        ax6.hist(dr.dose_response_model(conc, samples[:, 1],
                                        dr.pic50_to_ic50(samples[:, 0])),
                 bins=50,
                 normed=True,
                 alpha=0.8,
                 color=colors[3 + i],
                 lw=0,
                 label=r"{} $\mu$M {}".format(conc, drug))
    ax6.set_title('F. Single-level inferred\neffects')

    ax6.legend(loc="best", fontsize=lfs)

    plot_dir = dr.all_predictions_dir(drug, channel)

    fig.tight_layout()
    png_file = plot_dir + '{}_{}_all_predictions_corrected.png'.format(
        drug, channel)
    print png_file
    fig.savefig(png_file)
    pdf_file = plot_dir + '{}_{}_all_predictions_corrected.pdf'.format(
        drug, channel)
    print pdf_file
    fig.savefig(
        pdf_file
    )  # uncomment to save as pdf, or change extension to whatever you want

    plt.close()

    print "Figures saved in", plot_dir
def run(drug, channel):
    print "\n\n{} + {}\n\n".format(drug, channel)

    num_expts, experiment_numbers, experiments = dr.load_crumb_data(
        drug, channel)
    if (0 < (args.num_expts) < num_expts):
        num_expts = args.num_expts
        experiment_numbers = [x for x in experiment_numbers[:num_expts]]
        experiments = [x for x in experiments[:num_expts]]
    drug, channel, output_dir, chain_dir, figs_dir, chain_file = dr.hierarchical_output_dirs_and_chain_file(
        drug, channel, num_expts)
    chain = np.loadtxt(chain_file)
    end, num_params = chain.shape
    burn = end / 4

    top_params = ['alpha', 'beta', 'mu', 's', 'sigma']
    top_param_indices = [0, 1, 2, 3, num_params - 2]
    mid_param_indices = [
        i for i in range(num_params - 1) if i not in top_param_indices
    ]
    num_expts = len(mid_param_indices) / 2
    if num_expts <= 4:  # qualitative and colourblind safe, apparently
        colors = ['#a6cee3', '#1f78b4', '#b2df8a', '#33a02c']
        colors = ['#d7191c', '#fdae61', '#2c7bb6']
    else:
        colors = [
            '#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99', '#e31a1c',
            '#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a'
        ]
    top_param_labels = [r'$\alpha$', r'$\beta$', r'$\mu$', r'$s$', r'$\sigma$']

    num_curves = 50
    indices = npr.randint(burn, end, num_curves)
    samples = chain[indices, :]

    all_fig = plt.figure(figsize=(4, 8))
    curves = all_fig.add_subplot(311)
    curves.set_xlabel(r'{} concentration ($\mu$M)'.format(drug))
    curves.set_ylabel(r'% {} block'.format(channel))
    curves.grid()
    curves.set_xscale('log')
    x_range = np.logspace(-4, 2, 201)
    for j in xrange(num_curves):
        for i in xrange(3):
            response = dr.dose_response_model(
                x_range, samples[j, 4 + 2 * i],
                dr.pic50_to_ic50(samples[j, 4 + 2 * i + 1]))
            curves.plot(x_range, response, color=colors[i], alpha=0.2)

    for i, expt in enumerate(experiments):
        curves.plot(expt[:, 0],
                    expt[:, 1],
                    'o',
                    color=colors[i],
                    zorder=10,
                    ms=10)
    curves.set_xlim(10**-4, 10**2)

    pic50s = all_fig.add_subplot(312)
    pic50s.grid()
    pic50s.set_ylabel('Probability density')
    pic50s.set_xlabel(r'$pIC50_i$')
    hills = all_fig.add_subplot(313)
    hills.grid()
    hills.set_ylabel('Probability density')
    hills.set_xlabel(r'$Hill_i$')

    alpha = 1. / (num_expts - 1)
    for i, col in enumerate(mid_param_indices):
        color = matplotlib.colors.ColorConverter().to_rgba(colors[i / 2],
                                                           alpha=alpha)
        if i % 2 == 0:
            label = r'$Hill_{}$'.format(i / 2 + 1)
            file_label = 'Hill_{}'.format(i / 2 + 1)
            hills.hist(chain[burn:, col],
                       normed=True,
                       bins=40,
                       color=color,
                       edgecolor='none',
                       label=r'$i = {}$'.format(i / 2 + 1))
        else:
            label = r'$pIC50_{}$'.format(i / 2 + 1)
            file_label = 'pIC50_{}'.format(i / 2 + 1)
            pic50s.hist(chain[burn:, col],
                        normed=True,
                        bins=40,
                        color=color,
                        edgecolor='none',
                        label=r'$i = {}$'.format(i / 2 + 1))
    hills.legend(loc=1, fontsize=10)
    all_fig.tight_layout()
    all_fig.savefig(
        figs_dir +
        '{}_{}_hierarchical_curves_and_hists.png'.format(drug, channel))
    #all_fig.savefig(figs_dir+'{}_{}_hierarchical_curves_and_hists.pdf'.format(drug,channel))
    plt.show(block=True)

    print "Figures saved in", figs_dir

    print "\n\n{} + {} done\n\n".format(drug, channel)
Example #13
0
def do_plots(drug_channel):
    top_drug, top_channel = drug_channel

    num_expts, experiment_numbers, experiments = dr.load_crumb_data(
        top_drug, top_channel)

    concs = np.array([])
    responses = np.array([])
    for i in xrange(num_expts):
        concs = np.concatenate((concs, experiments[i][:, 0]))
        responses = np.concatenate((responses, experiments[i][:, 1]))

    xmin = 1000
    xmax = -1000
    for expt in experiments:
        a = np.min(expt[:, 0])
        b = np.max(expt[:, 0])
        if a > 0 and a < xmin:
            xmin = a
        if b > xmax:
            xmax = b

    xmin = int(np.log10(xmin)) - 2
    xmax = int(np.log10(xmax)) + 3

    x = np.logspace(xmin, xmax, num_pts)

    fig, (ax1, ax2) = plt.subplots(1,
                                   2,
                                   figsize=(9, 4),
                                   sharey=True,
                                   sharex=True)
    ax1.set_xscale('log')
    ax1.grid()
    ax2.grid()
    ax1.set_xlim(10**xmin, 10**xmax)
    ax1.set_ylim(0, 100)
    ax1.set_xlabel(r'{} concentration ($\mu$M)'.format(top_drug))
    ax2.set_xlabel(r'{} concentration ($\mu$M)'.format(top_drug))
    ax1.set_ylabel(r'% {} block'.format(top_channel))

    model = 1
    drug, channel, chain_file, images_dir = dr.nonhierarchical_chain_file_and_figs_dir(
        model, top_drug, top_channel, temperature)

    chain = np.loadtxt(chain_file)
    best_idx = np.argmax(chain[:, -1])
    best_pic50, best_sigma = chain[best_idx, [0, 1]]

    saved_its, h = chain.shape
    rand_idx = npr.randint(saved_its, size=num_curves)

    pic50s = chain[rand_idx, 0]
    ax1.set_title(r'Model 1: $pIC50 = {}$, fixed $Hill = 1$'.format(
        np.round(best_pic50, 2)))
    for i in xrange(num_curves):
        ax1.plot(x,
                 dr.dose_response_model(x, 1., dr.pic50_to_ic50(pic50s[i])),
                 color='black',
                 alpha=0.02)
    max_pd_curve = dr.dose_response_model(x, 1., dr.pic50_to_ic50(best_pic50))
    ax1.plot(x, max_pd_curve, label='Max PD', lw=1.5, color='red')
    ax1.plot(concs,
             responses,
             "o",
             color='orange',
             ms=10,
             label='Data',
             zorder=10)

    anyArtist = plt.Line2D((0, 1), (0, 0), color='k')

    handles, labels = ax1.get_legend_handles_labels()

    if drug == "Quinine" and channel == "Nav1.5-late":
        loc = 4
    else:
        loc = 2
    ax1.legend(handles + [anyArtist], labels + ["Samples"], loc=loc)

    model = 2
    drug, channel, chain_file, images_dir = dr.nonhierarchical_chain_file_and_figs_dir(
        model, top_drug, top_channel, temperature)

    chain = np.loadtxt(chain_file)
    best_idx = np.argmax(chain[:, -1])
    best_pic50, best_hill, best_sigma = chain[best_idx, [0, 1, 2]]

    ax2.set_title(r"Model 2: $pIC50={}$, Hill = {}".format(
        np.round(best_pic50, 2), np.round(best_hill, 2)))

    saved_its, h = chain.shape
    rand_idx = npr.randint(saved_its, size=num_curves)

    pic50s = chain[rand_idx, 0]
    hills = chain[rand_idx, 1]
    for i in xrange(num_curves):
        ax2.plot(x,
                 dr.dose_response_model(x, hills[i],
                                        dr.pic50_to_ic50(pic50s[i])),
                 color='black',
                 alpha=0.02)
    max_pd_curve = dr.dose_response_model(x, best_hill,
                                          dr.pic50_to_ic50(best_pic50))
    ax2.plot(x, max_pd_curve, label='Max PD', lw=1.5, color='red')
    ax2.plot(concs,
             responses,
             "o",
             color='orange',
             ms=10,
             label='Data',
             zorder=10)

    handles, labels = ax2.get_legend_handles_labels()
    ax2.legend(handles + [anyArtist], labels + ["Samples"], loc=loc)

    fig.tight_layout()
    #plt.show(block=True)
    #sys.exit()

    fig.savefig("{}_{}_nonh_both_models_mcmc_prediction_curves.png".format(
        drug, channel))
    plt.close()

    return None
Example #14
0
def run(drug_channel):
    drug,channel = drug_channel
    print "\n\n{} + {}\n\n".format(drug,channel)
    
    num_expts, experiment_numbers, experiments = dr.load_crumb_data(drug,channel)
    if (0 < args.num_expts < num_expts):
        num_expts = args.num_expts
        
    drug, channel, output_dir, chain_dir, figs_dir, chain_file = dr.hierarchical_output_dirs_and_chain_file(drug,channel,num_expts)
    
    hill_cdf_file, pic50_cdf_file = dr.hierarchical_posterior_predictive_cdf_files(drug,channel,num_expts)
    
    hill_cdf = np.loadtxt(hill_cdf_file)
    pic50_cdf = np.loadtxt(pic50_cdf_file)
    
    num_samples = 2000
    
    unif_hill_samples = npr.rand(num_samples)
    unif_pic50_samples = npr.rand(num_samples)
    
    hill_samples = np.interp(unif_hill_samples, hill_cdf[:,1], hill_cdf[:,0])
    pic50_samples = np.interp(unif_pic50_samples, pic50_cdf[:,1], pic50_cdf[:,0])
    
    
    
    
    fig = plt.figure(figsize=(11,7))
    
    
    ax1 = fig.add_subplot(231)
    ax1.grid()
    xmin = -4
    xmax = 3
    concs = np.logspace(xmin,xmax,101)
    ax1.set_xscale('log')
    ax1.set_ylim(0,100)
    ax1.set_xlabel(r'{} concentration ($\mu$M)'.format(drug))
    ax1.set_ylabel(r'% {} block'.format(channel))
    ax1.set_title('A. Hierarchical predicted\nfuture experiments')
    ax1.set_xlim(10**xmin,10**xmax)
    
    for expt in experiment_numbers:
        ax1.scatter(experiments[expt][:,0],experiments[expt][:,1],label='Expt {}'.format(expt+1),color=colors[expt],s=100,zorder=10)
    
    for i, conc in enumerate(args.concs):
        ax1.axvline(conc,color=colors[3+i],lw=2,label=r"{} $\mu$M".format(conc),alpha=0.8)
    for i in xrange(num_samples):
        ax1.plot(concs,dr.dose_response_model(concs,hill_samples[i],dr.pic50_to_ic50(pic50_samples[i])),color='black',alpha=0.01)
    ax1.legend(loc=2,fontsize=10)
    
    num_hist_samples = 100000
    
    unif_hill_samples = npr.rand(num_hist_samples)
    unif_pic50_samples = npr.rand(num_hist_samples)
    
    hill_samples = np.interp(unif_hill_samples, hill_cdf[:,1], hill_cdf[:,0])
    pic50_samples = np.interp(unif_pic50_samples, pic50_cdf[:,1], pic50_cdf[:,0])
    
    ax2 = fig.add_subplot(234)
    ax2.set_xlim(0,100)
    ax2.set_xlabel(r'% {} block'.format(channel))
    ax2.set_ylabel(r'Probability density')
    ax2.grid()
    for i, conc in enumerate(args.concs):
        ax2.hist(dr.dose_response_model(conc,hill_samples,dr.pic50_to_ic50(pic50_samples)),bins=50,normed=True,color=colors[3+i],alpha=0.8,edgecolor='none',label=r"{} $\mu$M {}".format(conc,drug))
    
    ax2.set_title('D. Hierarchical predicted\nfuture experiments')
    ax2.legend(loc=2,fontsize=10)
        
    ax3 = fig.add_subplot(232,sharey=ax1)
    ax3.grid()
    xmin = -4
    xmax = 3
    concs = np.logspace(xmin,xmax,101)
    ax3.set_xscale('log')
    ax3.set_ylim(0,100)
    ax3.set_xlabel(r'{} concentration ($\mu$M)'.format(drug))
    ax3.set_title('B. Hierarchical inferred\nunderlying effects')
    ax3.set_xlim(10**xmin,10**xmax)
    
    for expt in experiment_numbers:
        ax3.scatter(experiments[expt][:,0],experiments[expt][:,1],label='Expt {}'.format(expt+1),color=colors[expt],s=100,zorder=10)
    
    chain = np.loadtxt(chain_file)
    end = chain.shape[0]
    burn = end/4
    
    num_samples = 1000
    alpha_indices = npr.randint(burn,end,num_samples)
    alpha_samples = chain[alpha_indices,0]
    mu_samples = chain[alpha_indices,2]
    for i, conc in enumerate(args.concs):
        ax3.axvline(conc,color=colors[3+i],lw=2,label=r"{} $\mu$M".format(conc),alpha=0.8)
    for i in xrange(num_samples):
        ax3.plot(concs,dr.dose_response_model(concs,alpha_samples[i],dr.pic50_to_ic50(mu_samples[i])),color='black',alpha=0.01)
    ax3.legend(loc=2,fontsize=10)
    ax4 = fig.add_subplot(235,sharey=ax2)
    ax4.set_xlim(0,100)
    ax4.set_xlabel(r'% {} block'.format(channel))
    ax4.grid()
    
    num_hist_samples = 100000
    hist_indices = npr.randint(burn,end,num_hist_samples)
    alphas = chain[hist_indices,0]
    mus = chain[hist_indices,2]
    
    for i, conc in enumerate(args.concs):
        ax4.hist(dr.dose_response_model(conc,alphas,dr.pic50_to_ic50(mus)),bins=50,normed=True,color=colors[3+i],alpha=0.8,edgecolor='none',label=r"{} $\mu$M {}".format(conc,drug))
    ax4.set_title('E. Hierarchical inferred\nunderlying effects')
    
    plt.setp(ax3.get_yticklabels(), visible=False)
    plt.setp(ax4.get_yticklabels(), visible=False)
    
    
    # now plot non-hierarchical
    
    num_params = 3
    drug,channel,chain_file,figs_dir = dr.nonhierarchical_chain_file_and_figs_dir(drug, channel, args.fix_hill)
    chain = np.loadtxt(chain_file,usecols=range(num_params-1)) # not interested in log-target values right now
    end = chain.shape[0]
    burn = end/4

    num_samples = 1000
    sample_indices = npr.randint(burn,end,num_samples)
    samples = chain[sample_indices,:]
    
    
    ax5 = fig.add_subplot(233,sharey=ax1)
    ax5.grid()
    plt.setp(ax5.get_yticklabels(), visible=False)
    xmin = -4
    xmax = 4
    concs = np.logspace(xmin,xmax,101)
    ax5.set_xscale('log')
    ax5.set_ylim(0,100)
    ax5.set_xlim(10**xmin,10**xmax)
    ax5.set_xlabel(r'{} concentration ($\mu$M)'.format(drug))
    ax5.set_title('C. Single-level inferred\neffects')
    ax5.legend(fontsize=10)
    
    for expt in experiment_numbers:
        if expt==1:
            ax5.scatter(experiments[expt][:,0],experiments[expt][:,1],color='orange',s=100,label='All expts',zorder=10)
        else:
            ax5.scatter(experiments[expt][:,0],experiments[expt][:,1],color='orange',s=100,zorder=10)
    
    for i, conc in enumerate(args.concs):
        ax5.axvline(conc,color=colors[3+i],alpha=0.8,lw=2,label=r"{} $\mu$M".format(conc))
    for i in xrange(num_samples):
        ax5.plot(concs,dr.dose_response_model(concs,samples[i,0],dr.pic50_to_ic50(samples[i,1])),color='black',alpha=0.01)
    ax5.legend(loc=2,fontsize=10)
    
    num_hist_samples = 50000
    sample_indices = npr.randint(burn,end,num_hist_samples)
    samples = chain[sample_indices,:]
    ax6 = fig.add_subplot(236,sharey=ax2)
    ax6.set_xlim(0,100)
    ax6.set_xlabel(r'% {} block'.format(channel))
    plt.setp(ax6.get_yticklabels(), visible=False)
    ax6.grid()
    for i, conc in enumerate(args.concs):
        ax6.hist(dr.dose_response_model(conc,samples[:,0],dr.pic50_to_ic50(samples[:,1])),bins=50,normed=True,alpha=0.8,color=colors[3+i],edgecolor='none',label=r"{} $\mu$M {}".format(conc,drug))
    ax6.set_title('F. Single-level inferred\neffects')

    ax2.legend(loc=2,fontsize=10)
    
    
    
    plot_dir = dr.all_predictions_dir(drug,channel)
    
    fig.tight_layout()
    fig.savefig(plot_dir+'{}_{}_all_predictions.png'.format(drug,channel))
    fig.savefig(plot_dir+'{}_{}_all_predictions.pdf'.format(drug,channel)) # uncomment to save as pdf, or change extension to whatever you want
    

    plt.close()
    
    print "Figures saved in", plot_dir
def plot_mcmc_samples(drug_channel):
    drug, channel = drug_channel

    fig = plt.figure(figsize=(5, 8))
    axes = {}
    axes[1] = fig.add_subplot(211)
    axes[2] = fig.add_subplot(212)

    #drug = "Amiodarone"
    #channel = "hERG"
    # drug = "Lopinavir"
    # channel = "Kir2.1"

    num_models = 2
    for model in xrange(1, num_models+1):

        dr.define_model(model)

        chain_file = dr.define_chain_file(model, drug, channel, temperature)

        num_expts, experiment_numbers, experiments = dr.load_crumb_data(drug, channel)
        figs_dir = dr.drug_channel_figs_dir(drug, channel)

        concs = np.array([])
        responses = np.array([])
        for i in xrange(num_expts):
            concs = np.concatenate((concs, experiments[i][:, 0]))
            responses = np.concatenate((responses, experiments[i][:, 1]))

        how_many_samples_to_plot = 1200

        mcmc_samples = np.loadtxt(chain_file, usecols=range(dr.num_params))
        saved_its = mcmc_samples.shape[0]
        sample_indices = npr.randint(0, saved_its, how_many_samples_to_plot)

        mcmc_samples = mcmc_samples[sample_indices]

        conc_min = np.min(concs)
        conc_max = np.max(concs)

        fsize = 14

        num_pts = 101
        x_range = np.logspace(int(np.log10(conc_min))-1, int(np.log10(conc_max))+2, num_pts)


        axes[model].set_xscale('log')
        axes[model].grid()
        axes[model].set_ylabel(r"% {} block".format(channel), fontsize=fsize)
        axes[model].set_xlabel(r"{} concentration ($\mu$M)".format(drug), fontsize=fsize)
        axes[model].set_ylim(0,100)

        for i in xrange(how_many_samples_to_plot):
            if model == 1:
                pic50 = mcmc_samples[i,0]
                hill = 1
                title = "$M_1$, fixed $Hill=1$, varying $pIC50$"
            elif model == 2:
                pic50, hill = mcmc_samples[i,:2]
                title = "$M_2$, varying $pIC50$ and $Hill$"
            axes[model].plot(x_range, dr.dose_response_model(x_range, hill, dr.pic50_to_ic50(pic50)),color='black',alpha=0.01)
        axes[model].plot(concs, responses, 'o', color='orange', ms=10, label="Expt data")
        axes[model].set_title(title, fontsize = fsize)
        axes[model].legend(loc=2)
    #axes[2].set_yticklabels([])
    fig.tight_layout()
    fig.savefig(all_figs_dir+'{}_{}_mcmc_samples.png'.format(drug, channel))
    fig.savefig(figs_dir+'{}_{}_mcmc_samples.pdf'.format(drug,channel,model))
    plt.close()
    return None
Example #16
0
def do_plot(drug_channel):
    global concs, responses

    fig = plt.figure(figsize=(5, 8))
    axes = {}
    axes[1] = fig.add_subplot(211)
    axes[2] = fig.add_subplot(212)#, sharey=axes[1])

    fsize = 14

    for model in xrange(1, num_models+1):

        dr.define_model(model)

        drug, channel = drug_channel
        num_expts, experiment_numbers, experiments = dr.load_crumb_data(drug, channel)
        figs_dir = dr.drug_channel_figs_dir(drug, channel)

        concs = np.array([])
        responses = np.array([])
        for i in xrange(num_expts):
            concs = np.concatenate((concs, experiments[i][:, 0]))
            responses = np.concatenate((responses, experiments[i][:, 1]))

        if model == 1:
            x0 = np.ones(2)
            sigma0 = 0.1
        elif model == 2:
            x0 = np.copy([pic50, hill])
            sigma0 = 0.01
        #x0[0] = 6.9

        opts = cma.CMAOptions()
        es = cma.CMAEvolutionStrategy(x0, sigma0, opts)
        while not es.stop():
            X = es.ask()
            f_vals = [sum_of_square_diffs(x, model) for x in X]
            es.tell(X, f_vals)
            es.disp()
        res = es.result()
        ss = res[1]
        pic50, hill = res[0]
        if model == 1:
            hill = 1

        conc_min = np.min(concs)
        conc_max = np.max(concs)

        num_pts = 501
        x_range = np.logspace(int(np.log10(conc_min))-1, int(np.log10(conc_max))+2, num_pts)
        predicted = dr.dose_response_model(x_range, hill, dr.pic50_to_ic50(pic50))



        #fig = plt.figure(figsize=(5,4))
        #ax = fig.add_subplot(111)
        axes[model].grid()
        axes[model].set_xscale('log')
        axes[model].set_ylim(0,100)
        axes[model].set_ylabel(r"% {} block".format(channel),fontsize=fsize)
        axes[model].set_xlabel(r"{} concentration ($\mu$M)".format(drug),fontsize=fsize)
        axes[model].plot(x_range, predicted, color='blue', lw=2, label="Best fit")
        axes[model].plot(concs, responses, 'o', color='orange', ms=10, label="Expt data")
        axes[model].legend(loc=2)
        axes[model].set_title("$M_{}, pIC50 = {}, Hill = {}, SS = {}$".format(model, round(pic50,2), round(hill,2), round(ss,2)),fontsize=fsize)
    #axes[2].set_yticklabels([])
    fig.tight_layout()
    #fig.savefig(figs_dir+"{}_{}_model_{}_best_fit.png".format(drug,channel,model))
    fig.savefig(all_figs_dir+"{}_{}_best_fits.png".format(drug, channel))
    fig.savefig(figs_dir+"{}_{}_best_fit.pdf".format(drug,channel))
    plt.close()
Example #17
0
def sum_of_square_diffs(_params, doses, responses):
    pIC50, hill = _params
    IC50 = dr.pic50_to_ic50(pIC50)
    test_responses = dr.dose_response_model(doses, hill, IC50)
    return np.sum((test_responses - responses)**2)
Example #18
0
    # could technically save 50% of the space for Model 1 by not bothering to save Hill=1 in every sample...
    with open(txt_file, "w") as outfile:
        outfile.write(
            "# {} (pIC50,Hill) samples from single-level MCMC (model {}) for {} + {}\n"
            .format(args.num_samples, args.model, drug, channel))
        np.savetxt(outfile, chain)

    fig, ax = plt.subplots(1, 1, figsize=(5, 4))
    ax.grid()
    ax.set_xlabel("{} concentration ($\mu$M)".format(drug))
    ax.set_ylabel("% {} block".format(channel))
    ax.set_xscale("log")
    x = np.logspace(min_x, max_x, num_x_pts)

    for t in xrange(args.num_samples):
        pic50, hill = chain[t, :]
        predicted_response_curve = dr.dose_response_model(
            x, hill, dr.pic50_to_ic50(pic50))
        ax.plot(x, predicted_response_curve, color='black', alpha=alpha)
    if args.plot_data:
        ax.plot(concs, responses, 'o', color='orange', ms=8, zorder=10)
    fig.tight_layout()
    fig.savefig(png_file)
    print "\nSaved {}\n".format(samples_png)
    if args.save_pdf:
        fig.savefig(samples_pdf)
        print "\nSaved {}\n".format(samples_pdf)

    plt.close()
Example #19
0
def run_single_level(drug_channel):

    drug, channel = drug_channel

    num_expts, experiment_numbers, experiments = dr.load_crumb_data(
        drug, channel)
    drug, channel, chain_file, images_dir = dr.nonhierarchical_chain_file_and_figs_dir(
        drug, channel)

    num_params = 3  # hill, pic50, mu

    concs = np.array([])
    responses = np.array([])
    for i in xrange(num_expts):
        concs = np.concatenate((concs, experiments[i][:, 0]))
        responses = np.concatenate((responses, experiments[i][:, 1]))

    print experiments
    print concs
    print responses

    # uniform prior intervals
    hill_prior = [0, 10]
    pic50_prior = [-1, 20]
    sigma_prior = [1e-3, 50]

    prior_lowers = np.array([hill_prior[0], pic50_prior[0], sigma_prior[0]])
    prior_uppers = np.array([hill_prior[1], pic50_prior[1], sigma_prior[1]])

    # for reproducible results, otherwise select a new random seed
    seed = 1
    npr.seed(seed)

    start = time.time()
    x0 = np.array(
        [1., 2.5]
    )  # not fitting sigma by CMA-ES, can maximise log-likelihood wrt sigma analytically
    sigma0 = 0.1
    opts = cma.CMAOptions()
    opts['seed'] = seed
    es = cma.CMAEvolutionStrategy(x0, sigma0, opts)
    while not es.stop():
        X = es.ask()
        es.tell(X, [sum_of_square_diffs(x, concs, responses) for x in X])
        es.disp()
    res = es.result()

    hill_cur = res[0][0]**2
    pic50_cur = res[0][1]**2 - 1
    sigma_cur = initial_sigma(len(responses), res[1])
    proposal_scale = 0.01

    theta_cur = np.array([hill_cur, pic50_cur, sigma_cur])
    mean_estimate = np.copy(theta_cur)
    cov_estimate = proposal_scale * np.diag(np.copy(np.abs(theta_cur)))

    cmaes_ll = log_likelihood_single(responses, concs, theta_cur)

    best_fit_fig = plt.figure(figsize=(5, 4))
    best_fit_ax = best_fit_fig.add_subplot(111)
    best_fit_ax.set_xscale('log')
    best_fit_ax.grid()
    plot_lower_lim = int(np.log10(np.min(concs))) - 1
    plot_upper_lim = int(np.log10(np.max(concs))) + 2
    best_fit_ax.set_xlim(10**plot_lower_lim, 10**plot_upper_lim)
    best_fit_ax.set_ylim(0, 100)
    num_pts = 1001
    x_range = np.logspace(plot_lower_lim, plot_upper_lim, num_pts)
    best_fit_curve = dr.dose_response_model(x_range, hill_cur,
                                            dr.pic50_to_ic50(pic50_cur))
    best_fit_ax.plot(x_range, best_fit_curve, label='Best fit', lw=2)
    best_fit_ax.set_ylabel('% {} block'.format(channel))
    best_fit_ax.set_xlabel(r'{} concentration ($\mu$M)'.format(drug))
    best_fit_ax.set_title('Hill = {}, pIC50 = {}'.format(
        np.round(hill_cur, 2), np.round(pic50_cur, 2)))
    best_fit_ax.scatter(concs,
                        responses,
                        marker="o",
                        color='orange',
                        s=100,
                        label='Data',
                        zorder=10)
    best_fit_ax.legend(loc=2)
    best_fit_fig.tight_layout()
    best_fit_fig.savefig(images_dir +
                         '{}_{}_CMA-ES_best_fit.png'.format(drug, channel))
    best_fit_fig.savefig(images_dir +
                         '{}_{}_CMA-ES_best_fit.png'.format(drug, channel))
    plt.close()

    #sys.exit() # uncomment if you only want to plot the best fit

    # let MCMC look around for a bit before adaptive covariance matrix
    # same rule (100*dimension) as in hierarchical case
    when_to_adapt = 100 * num_params

    log_target_cur = log_likelihood_single(responses, concs, theta_cur)
    print "initial log_target_cur =", log_target_cur

    # effectively step size, scales covariance matrix
    loga = 0.
    # what fraction of proposed samples are being accepted into the chain
    acceptance = 0.
    # what fraction of samples we WANT accepted into the chain
    # loga updates itself to try to make this dream come true
    target_acceptance = 0.25

    total_iterations = args.iterations
    thinning = args.thinning
    assert (total_iterations % thinning == 0)

    # how often to print a little status message
    status_when = total_iterations / 20

    saved_iterations = total_iterations / thinning + 1
    # also want to store log-target value at each iteration
    chain = np.zeros((saved_iterations, num_params + 1))

    chain[0, :] = np.concatenate((np.copy(theta_cur), [log_target_cur]))
    print chain[0]

    print "concs:", concs
    print "responses:", responses

    # MCMC!
    t = 1
    start = time.time()
    while t <= total_iterations:
        theta_star = npr.multivariate_normal(theta_cur,
                                             np.exp(loga) * cov_estimate)
        accepted = 0
        if np.all(prior_lowers < theta_star) and np.all(
                theta_star < prior_uppers):
            log_target_star = log_likelihood_single(responses, concs,
                                                    theta_star)
            accept_prob = npr.rand()
            if (np.log(accept_prob) < log_target_star - log_target_cur):
                theta_cur = theta_star
                log_target_cur = log_target_star
                accepted = 1
        acceptance = ((t - 1.) * acceptance + accepted) / t
        if (t > when_to_adapt):
            s = t - when_to_adapt
            gamma_s = 1 / (s + 1)**0.6
            temp_covariance_bit = np.array([theta_cur - mean_estimate])
            cov_estimate = (1 - gamma_s) * cov_estimate + gamma_s * np.dot(
                np.transpose(temp_covariance_bit), temp_covariance_bit)
            mean_estimate = (1 - gamma_s) * mean_estimate + gamma_s * theta_cur
            loga += gamma_s * (accepted - target_acceptance)
        if (t % thinning == 0):
            chain[t / thinning, :] = np.concatenate(
                (np.copy(theta_cur), [log_target_cur]))
        if (t % status_when == 0):
            print "{} / {}".format(t / status_when,
                                   total_iterations / status_when)
            time_taken_so_far = time.time() - start
            estimated_time_left = time_taken_so_far / t * (total_iterations -
                                                           t)
            print "Time taken: {} s = {} min".format(
                np.round(time_taken_so_far, 1),
                np.round(time_taken_so_far / 60, 2))
            print "acceptance = {}".format(np.round(acceptance, 5))
            print "Estimated time remaining: {} s = {} min".format(
                np.round(estimated_time_left, 1),
                np.round(estimated_time_left / 60, 2))
        t += 1

    print "\nTime taken to do {} MCMC iterations: {} s\n".format(
        total_iterations,
        time.time() - start)
    print "Final iteration:", chain[-1, :], "\n"

    with open(chain_file, 'w') as outfile:
        outfile.write(
            '# Nonhierarchical MCMC output for {} + {}: (Hill,pIC50,sigma,log-target)\n'
            .format(drug, channel))
        np.savetxt(outfile, chain)

    try:
        assert (len(chain[:, 0]) == saved_iterations)
    except AssertionError:
        print "len(chain[:,0])!=saved_iterations"
        sys.exit()

    burn_fraction = args.burn_in_fraction
    burn = saved_iterations / burn_fraction

    best_ll_index = np.argmax(chain[:, num_params])
    best_ll_row = chain[best_ll_index, :]
    print "Best log-likelihood:", "\n", best_ll_row

    figs = []
    axs = []
    # plot all marginal posterior distributions
    for i in range(num_params):
        labels = ['Hill', 'pIC50', r'$\sigma$']
        file_labels = ['Hill', 'pIC50', 'sigma']
        figs.append(plt.figure())
        axs.append([])
        axs[i].append(figs[i].add_subplot(211))
        axs[i][0].hist(chain[burn:, i], bins=40, normed=True)
        axs[i][0].legend()
        axs[i][0].set_title("MCMC marginal distributions")
        axs[i].append(figs[i].add_subplot(212, sharex=axs[i][0]))
        axs[i][1].plot(chain[burn:, i], range(burn, saved_iterations))
        axs[i][1].invert_yaxis()
        axs[i][1].set_xlabel(labels[i])
        axs[i][1].set_ylabel('Saved MCMC iteration')
        figs[i].tight_layout()
        figs[i].savefig(
            images_dir +
            '{}_{}_{}_marginal.png'.format(drug, channel, file_labels[i]))
        plt.close()

    # plot log-target path
    fig2 = plt.figure()
    ax3 = fig2.add_subplot(111)
    ax3.plot(range(saved_iterations), chain[:, -1])
    ax3.set_xlabel('MCMC iteration')
    ax3.set_ylabel('log-target')
    fig2.tight_layout()
    fig2.savefig(images_dir + 'log_target.png')
    plt.close()

    # plot scatterplot matrix of posterior(s)
    labels = ['Hill', 'pIC50', r'$\sigma$']
    colormin, colormax = 1e9, 0
    norm = matplotlib.colors.Normalize(vmin=5, vmax=10)
    hidden_labels = []
    count = 0
    # there's probably a better way to do this
    # I plot all the histograms to normalize the colours, in an attempt to give a better comparison between the pairwise plots
    while count < 2:
        axes = {}
        matrix_fig = plt.figure(figsize=(3 * num_params, 3 * num_params))
        for i in range(num_params):
            for j in range(i + 1):
                ij = str(i) + str(j)
                subplot_position = num_params * i + j + 1
                if i == j:
                    axes[ij] = matrix_fig.add_subplot(num_params, num_params,
                                                      subplot_position)
                    axes[ij].hist(chain[burn:, i],
                                  bins=50,
                                  normed=True,
                                  color='blue')
                elif j == 0:  # this column shares x-axis with top-left
                    axes[ij] = matrix_fig.add_subplot(num_params,
                                                      num_params,
                                                      subplot_position,
                                                      sharex=axes["00"])
                    counts, xedges, yedges, Image = axes[ij].hist2d(
                        chain[burn:, j],
                        chain[burn:, i],
                        cmap='hot_r',
                        bins=50,
                        norm=norm)
                    maxcounts = np.amax(counts)
                    if maxcounts > colormax:
                        colormax = maxcounts
                    mincounts = np.amin(counts)
                    if mincounts < colormin:
                        colormin = mincounts
                else:
                    axes[ij] = matrix_fig.add_subplot(
                        num_params,
                        num_params,
                        subplot_position,
                        sharex=axes[str(j) + str(j)],
                        sharey=axes[str(i) + "0"])
                    counts, xedges, yedges, Image = axes[ij].hist2d(
                        chain[burn:, j],
                        chain[burn:, i],
                        cmap='hot_r',
                        bins=50,
                        norm=norm)
                    maxcounts = np.amax(counts)
                    if maxcounts > colormax:
                        colormax = maxcounts
                    mincounts = np.amin(counts)
                    if mincounts < colormin:
                        colormin = mincounts
                if i != num_params - 1:
                    hidden_labels.append(axes[ij].get_xticklabels())
                if j != 0:
                    hidden_labels.append(axes[ij].get_yticklabels())
                if i == num_params - 1:
                    axes[str(i) + str(j)].set_xlabel(labels[j])
                if j == 0:
                    axes[str(i) + str(j)].set_ylabel(labels[i])
                plt.xticks(rotation=30)
        norm = matplotlib.colors.Normalize(vmin=colormin, vmax=colormax)
        count += 1

    plt.setp(hidden_labels, visible=False)

    matrix_fig.tight_layout()
    matrix_fig.savefig(images_dir +
                       "{}_{}_scatterplot_matrix.png".format(drug, channel))
    #matrix_fig.savefig(images_dir+"{}_{}_scatterplot_matrix.pdf".format(drug,channel))
    plt.close()

    print "\n\n{} + {} complete!\n\n".format(drug, channel)