def log_likelihood_single_fix_hill(measurements, doses, theta): # using hill = 1, but not bothering to assign it pIC50, sigma = theta IC50 = dr.pic50_to_ic50(pIC50) return -len(measurements) * np.log(sigma) - np.sum( (measurements - dr.dose_response_model(doses, 1, IC50))** 2) / (2. * sigma**2)
def log_likelihood_single(measurements, doses, theta): hill = theta[0] pIC50 = theta[1] sigma = theta[2] IC50 = dr.pic50_to_ic50(pIC50) return -len(measurements) * np.log(sigma) - np.sum( (measurements - dr.dose_response_model(doses, hill, IC50))** 2) / (2. * sigma**2)
def sum_of_square_diffs(params, model): if model == 1: pic50 = params[0] hill = 1 elif model == 2: pic50, hill = params if hill <= hill_lower or hill > hill_upper or pic50 <= pic50_lower: return 1e9 else: predicted = dr.dose_response_model(concs, hill, dr.pic50_to_ic50(pic50)) return np.sum((responses-predicted)**2)
def sum_of_square_diffs(params, model): if model == 1: pic50 = params[0] hill = 1 elif model == 2: pic50, hill = params if hill <= hill_lower or hill > hill_upper or pic50 <= pic50_lower: return 1e9 else: predicted = dr.dose_response_model(concs, hill, dr.pic50_to_ic50(pic50)) return np.sum((responses - predicted)**2)
def log_data_likelihood(hill_is, pic50_is, sigma, experiments): Ne = len(experiments) answer = 0. for i in range(Ne): ic50 = dr.pic50_to_ic50(pic50_is[i]) concs = experiments[i][:, 0] num_expt_pts = len(concs) data = experiments[i][:, 1] model_responses = dr.dose_response_model(concs, hill_is[i], ic50) exp_bit = np.sum((data - model_responses)**2) / (2 * sigma**2) # assuming noise Normal is truncated at 0 and 100 truncated_scale = np.sum( np.log( st.norm.cdf(100, model_responses, sigma) - st.norm.cdf(0, model_responses, sigma))) answer -= (num_expt_pts * np.log(sigma) + exp_bit + truncated_scale) if np.isnan(answer): print "NaN from log_data_likelihood!" print "hill_is =", hill_is print "pic50_is =", pic50_is print "sigma =", sigma sys.exit() return answer
def do_plot(drug_channel): global concs, responses fig = plt.figure(figsize=(5, 8)) axes = {} axes[1] = fig.add_subplot(211) axes[2] = fig.add_subplot(212) #, sharey=axes[1]) fsize = 14 for model in xrange(1, num_models + 1): dr.define_model(model) drug, channel = drug_channel num_expts, experiment_numbers, experiments = dr.load_crumb_data( drug, channel) figs_dir = dr.drug_channel_figs_dir(drug, channel) concs = np.array([]) responses = np.array([]) for i in xrange(num_expts): concs = np.concatenate((concs, experiments[i][:, 0])) responses = np.concatenate((responses, experiments[i][:, 1])) if model == 1: x0 = np.ones(2) sigma0 = 0.1 elif model == 2: x0 = np.copy([pic50, hill]) sigma0 = 0.01 #x0[0] = 6.9 opts = cma.CMAOptions() es = cma.CMAEvolutionStrategy(x0, sigma0, opts) while not es.stop(): X = es.ask() f_vals = [sum_of_square_diffs(x, model) for x in X] es.tell(X, f_vals) es.disp() res = es.result() ss = res[1] pic50, hill = res[0] if model == 1: hill = 1 conc_min = np.min(concs) conc_max = np.max(concs) num_pts = 501 x_range = np.logspace( int(np.log10(conc_min)) - 1, int(np.log10(conc_max)) + 2, num_pts) predicted = dr.dose_response_model(x_range, hill, dr.pic50_to_ic50(pic50)) #fig = plt.figure(figsize=(5,4)) #ax = fig.add_subplot(111) axes[model].grid() axes[model].set_xscale('log') axes[model].set_ylim(0, 100) axes[model].set_ylabel(r"% {} block".format(channel), fontsize=fsize) axes[model].set_xlabel(r"{} concentration ($\mu$M)".format(drug), fontsize=fsize) axes[model].plot(x_range, predicted, color='blue', lw=2, label="Best fit") axes[model].plot(concs, responses, 'o', color='orange', ms=10, label="Expt data") axes[model].legend(loc=2) axes[model].set_title("$M_{}, pIC50 = {}, Hill = {}, SS = {}$".format( model, round(pic50, 2), round(hill, 2), round(ss, 2)), fontsize=fsize) #axes[2].set_yticklabels([]) fig.tight_layout() #fig.savefig(figs_dir+"{}_{}_model_{}_best_fit.png".format(drug,channel,model)) fig.savefig(all_figs_dir + "{}_{}_best_fits.png".format(drug, channel)) fig.savefig(figs_dir + "{}_{}_best_fit.pdf".format(drug, channel)) plt.close()
def run_single_level(drug_channel): drug, channel = drug_channel print "\n\n{} + {}\n\n".format(drug, channel) seed = 100 try: num_expts, experiment_numbers, experiments = dr.load_crumb_data( drug, channel) except: print "Problem loading data, guessing there are no entries for {} + {} --- skipping".format( drug, channel) return None drug, channel, chain_file, images_dir = dr.nonhierarchical_chain_file_and_figs_dir( args.model, drug, channel, temperature) concs = np.array([]) responses = np.array([]) for i in xrange(num_expts): concs = np.concatenate((concs, experiments[i][:, 0])) responses = np.concatenate((responses, experiments[i][:, 1])) if np.any(np.isnan(responses)): print "Skipping {} because of empty responses / missing data".format( drug_channel) return None #print experiments #print concs #print responses where_r_0 = responses == 0 where_r_100 = responses == 100 where_r_other = (0 < responses) & (responses < 100) #print "where_r_0:", where_r_0 #print "where_r_100:", where_r_100 #print "where_r_other:", where_r_other pi_bit = dr.compute_pi_bit_of_log_likelihood(where_r_other) # plot priors for i in xrange(num_params): fig = plt.figure(figsize=(4, 3)) ax = fig.add_subplot(111) ax.grid() ax.plot(dr.prior_xs[i], dr.prior_pdfs[i], color='blue', lw=2) ax.set_xlabel(dr.labels[i]) ax.set_ylabel("Prior pdf") fig.tight_layout() fig.savefig(images_dir + dr.file_labels[i] + "_prior_pdf.pdf") plt.close() start = time.time() sigma0 = 0.1 opts = cma.CMAOptions() opts['seed'] = seed if args.model == 1: #x0 = np.array([2.5, 3.]) x0 = np.array([2.5, 1.]) es = cma.CMAEvolutionStrategy(x0, sigma0, opts) while not es.stop(): X = es.ask() #es.tell(X, [-dr.log_target(responses, where_r_0, where_r_100, where_r_other, concs, x**2 + [dr.pic50_exp_lower,dr.sigma_uniform_lower], temperature, pi_bit) for x in X]) es.tell(X, [ sum_of_square_diffs([x[0]**2 + dr.pic50_exp_lower, 1.], concs, responses) for x in X ]) es.disp() res = es.result #pic50_cur, sigma_cur = res[0]**2 + [dr.pic50_exp_lower, dr.sigma_uniform_lower] pic50_cur = res[0][0]**2 + dr.pic50_exp_lower hill_cur = 1 elif args.model == 2: #x0 = np.array([2.5, 1., 3.]) x0 = np.array([2.5, 1.]) es = cma.CMAEvolutionStrategy(x0, sigma0, opts) while not es.stop(): X = es.ask() #es.tell(X, [-dr.log_target(responses, where_r_0, where_r_100, where_r_other, concs, x**2 + [dr.pic50_exp_lower, dr.hill_uniform_lower, dr.sigma_uniform_lower], temperature, pi_bit) for x in X]) es.tell(X, [ sum_of_square_diffs( x**2 + [dr.pic50_exp_lower, dr.hill_uniform_lower], concs, responses) for x in X ]) es.disp() res = es.result #pic50_cur, hill_cur, sigma_cur = res[0]**2 + [dr.pic50_exp_lower, dr.hill_uniform_lower, dr.sigma_uniform_lower] pic50_cur, hill_cur = res[0]**2 + [ dr.pic50_exp_lower, dr.hill_uniform_lower ] sigma_cur = initial_sigma(len(responses), res[1]) #print "sigma_cur:", sigma_cur if args.model == 1: theta_cur = np.array([pic50_cur, sigma_cur]) elif args.model == 2: theta_cur = np.array([pic50_cur, hill_cur, sigma_cur]) #print "theta_cur:", theta_cur best_params_file = images_dir + "{}_{}_best_fit_params.txt".format( drug, channel) with open(best_params_file, "w") as outfile: outfile.write("# CMA-ES best fit params\n") if args.model == 1: outfile.write("# pIC50, sigma, (Hill=1, not included)\n") elif args.model == 2: outfile.write("# pIC50, Hill, sigma\n") np.savetxt(outfile, [theta_cur]) proposal_scale = 0.05 mean_estimate = np.copy(theta_cur) cov_estimate = proposal_scale * np.diag(np.copy(np.abs(theta_cur))) cmaes_ll = dr.log_target(responses, where_r_0, where_r_100, where_r_other, concs, theta_cur, temperature, pi_bit) #print "cmaes_ll:", cmaes_ll best_fit_fig = plt.figure(figsize=(5, 4)) best_fit_ax = best_fit_fig.add_subplot(111) best_fit_ax.set_xscale('log') best_fit_ax.grid() if np.min(concs) == 0: plot_lower_lim = int(np.log10(np.min(concs[np.nonzero(concs)]))) - 2 else: plot_lower_lim = int(np.log10(np.min(concs))) - 2 plot_upper_lim = int(np.log10(np.max(concs))) + 2 best_fit_ax.set_xlim(10**plot_lower_lim, 10**plot_upper_lim) best_fit_ax.set_ylim(0, 100) num_x_pts = 1001 x_range = np.logspace(plot_lower_lim, plot_upper_lim, num_x_pts) best_fit_curve = dr.dose_response_model(x_range, hill_cur, dr.pic50_to_ic50(pic50_cur)) best_fit_ax.plot(x_range, best_fit_curve, label='Best fit', lw=2) best_fit_ax.set_ylabel('% {} block'.format(channel)) best_fit_ax.set_xlabel(r'{} concentration ($\mu$M)'.format(drug)) best_fit_ax.set_title(r'$pIC50 = {}, Hill = {}; SS = {}$'.format( np.round(pic50_cur, 2), np.round(hill_cur, 2), round(res[1], 2))) best_fit_ax.plot(concs, responses, "o", color='orange', ms=10, label='Data', zorder=10) best_fit_ax.legend(loc=2) best_fit_fig.tight_layout() best_fit_fig.savefig( images_dir + '{}_{}_model_{}_CMA-ES_best_fit.png'.format(drug, channel, args.model)) best_fit_fig.savefig( images_dir + '{}_{}_model_{}_CMA-ES_best_fit.pdf'.format(drug, channel, args.model)) plt.close() if args.best_fit_only: print "\nStopping {}+{} after doing and plotting best fit\n".format( drug, channel) return None # let MCMC look around for a bit before adaptive covariance matrix # same rule (100*dimension) as in hierarchical case when_to_adapt = 1000 * num_params log_target_cur = dr.log_target(responses, where_r_0, where_r_100, where_r_other, concs, theta_cur, temperature, pi_bit) #print "initial log_target_cur =", log_target_cur # effectively step size, scales covariance matrix loga = 0. # what fraction of proposed samples are being accepted into the chain acceptance = 0. # what fraction of samples we WANT accepted into the chain # loga updates itself to try to make this dream come true target_acceptance = 0.25 total_iterations = args.iterations thinning = args.thinning assert (total_iterations % thinning == 0) # how often to print a little status message status_when = total_iterations / 20 saved_iterations = total_iterations / thinning + 1 # also want to store log-target value at each iteration chain = np.zeros((saved_iterations, num_params + 1)) chain[0, :] = np.concatenate((np.copy(theta_cur), [log_target_cur])) #print chain[0] #print "concs:", concs #print "responses:", responses # for reproducible results, otherwise select a new random seed seed = 25 npr.seed(seed) # MCMC! t = 1 start = time.time() while t <= total_iterations: theta_star = npr.multivariate_normal(theta_cur, np.exp(loga) * cov_estimate) accepted = 0 log_target_star = dr.log_target(responses, where_r_0, where_r_100, where_r_other, concs, theta_star, temperature, pi_bit) accept_prob = npr.rand() if (np.log(accept_prob) < log_target_star - log_target_cur): theta_cur = theta_star log_target_cur = log_target_star accepted = 1 acceptance = ((t - 1.) * acceptance + accepted) / t if (t > when_to_adapt): s = t - when_to_adapt gamma_s = 1 / (s + 1)**0.6 temp_covariance_bit = np.array([theta_cur - mean_estimate]) cov_estimate = (1 - gamma_s) * cov_estimate + gamma_s * np.dot( np.transpose(temp_covariance_bit), temp_covariance_bit) mean_estimate = (1 - gamma_s) * mean_estimate + gamma_s * theta_cur loga += gamma_s * (accepted - target_acceptance) if (t % thinning == 0): chain[t / thinning, :] = np.concatenate( (np.copy(theta_cur), [log_target_cur])) if (t % status_when == 0): #print "{} / {}".format(t/status_when,total_iterations/status_when) time_taken_so_far = time.time() - start estimated_time_left = time_taken_so_far / t * (total_iterations - t) #print "Time taken: {} s = {} min".format(np.round(time_taken_so_far,1),np.round(time_taken_so_far/60,2)) #print "acceptance = {}".format(np.round(acceptance,5)) #print "Estimated time remaining: {} s = {} min".format(np.round(estimated_time_left,1),np.round(estimated_time_left/60,2)) t += 1 #print "\nTime taken to do {} MCMC iterations: {} s\n".format(total_iterations, time.time()-start) #print "Final iteration:", chain[-1,:], "\n" burn_fraction = args.burn_in_fraction burn = saved_iterations / burn_fraction chain = chain[burn:, :] # remove burn-in before saving with open(chain_file, 'w') as outfile: outfile.write( '# Nonhierarchical MCMC output for {} + {}: (Hill,pIC50,sigma,log-target)\n' .format(drug, channel)) np.savetxt(outfile, chain) best_ll_index = np.argmax(chain[:, num_params]) best_ll_row = chain[best_ll_index, :] #print "Best log-likelihood:", "\n", best_ll_row figs = [] axs = [] # plot all marginal posterior distributions for i in range(num_params): figs.append(plt.figure()) axs.append([]) axs[i].append(figs[i].add_subplot(211)) axs[i][0].hist(chain[:, i], bins=40, normed=True, color='blue', edgecolor='blue') axs[i][0].legend() axs[i][0].set_title("MCMC marginal distributions") axs[i][0].set_ylabel("Normalised frequency") axs[i][0].grid() plt.setp(axs[i][0].get_xticklabels(), visible=False) axs[i].append(figs[i].add_subplot(212, sharex=axs[i][0])) axs[i][1].plot(chain[:, i], range(burn, saved_iterations)) axs[i][1].invert_yaxis() axs[i][1].set_xlabel(dr.labels[i]) axs[i][1].set_ylabel('Saved MCMC iteration') axs[i][1].grid() figs[i].tight_layout() figs[i].savefig(images_dir + '{}_{}_model_{}_{}_marginal.png'.format( drug, channel, args.model, dr.file_labels[i])) plt.close() # plot log-target path fig2 = plt.figure() ax3 = fig2.add_subplot(111) ax3.plot(range(burn, saved_iterations), chain[:, -1]) ax3.set_xlabel('MCMC iteration') ax3.set_ylabel('log-target') ax3.grid() fig2.tight_layout() fig2.savefig(images_dir + 'log_target.png') plt.close() # plot scatterplot matrix of posterior(s) colormin, colormax = 1e9, 0 norm = matplotlib.colors.Normalize(vmin=5, vmax=10) hidden_labels = [] count = 0 # there's probably a better way to do this # I plot all the histograms to normalize the colours, in an attempt to give a better comparison between the pairwise plots while count < 2: axes = {} matrix_fig = plt.figure(figsize=(3 * num_params, 3 * num_params)) for i in range(num_params): for j in range(i + 1): ij = str(i) + str(j) subplot_position = num_params * i + j + 1 if i == j: axes[ij] = matrix_fig.add_subplot(num_params, num_params, subplot_position) axes[ij].hist(chain[:, i], bins=50, normed=True, color='blue', edgecolor='blue') elif j == 0: # this column shares x-axis with top-left axes[ij] = matrix_fig.add_subplot(num_params, num_params, subplot_position, sharex=axes["00"]) counts, xedges, yedges, Image = axes[ij].hist2d( chain[:, j], chain[:, i], cmap='hot_r', bins=50, norm=norm) maxcounts = np.amax(counts) if maxcounts > colormax: colormax = maxcounts mincounts = np.amin(counts) if mincounts < colormin: colormin = mincounts else: axes[ij] = matrix_fig.add_subplot( num_params, num_params, subplot_position, sharex=axes[str(j) + str(j)], sharey=axes[str(i) + "0"]) counts, xedges, yedges, Image = axes[ij].hist2d( chain[:, j], chain[:, i], cmap='hot_r', bins=50, norm=norm) maxcounts = np.amax(counts) if maxcounts > colormax: colormax = maxcounts mincounts = np.amin(counts) if mincounts < colormin: colormin = mincounts axes[ij].xaxis.grid() if (i != j): axes[ij].yaxis.grid() if i != num_params - 1: hidden_labels.append(axes[ij].get_xticklabels()) if j != 0: hidden_labels.append(axes[ij].get_yticklabels()) if i == j == 0: hidden_labels.append(axes[ij].get_yticklabels()) if i == num_params - 1: axes[str(i) + str(j)].set_xlabel(dr.labels[j], fontsize=18) if j == 0 and i > 0: axes[str(i) + str(j)].set_ylabel(dr.labels[i], fontsize=18) plt.xticks(rotation=30) norm = matplotlib.colors.Normalize(vmin=colormin, vmax=colormax) count += 1 plt.setp(hidden_labels, visible=False) matrix_fig.tight_layout() matrix_fig.savefig(images_dir + "{}_{}_model_{}_scatterplot_matrix.png".format( drug, channel, args.model)) matrix_fig.savefig(images_dir + "{}_{}_model_{}_scatterplot_matrix.pdf".format( drug, channel, args.model)) plt.close() print "\n\n{} + {} complete!\n\n".format(drug, channel) return None
def run_hierarchical(drug_channel): global pic50_prior pic50_prior = [ -2. ] # bad way to deal with sum_of_square_diffs in hierarchical case global pic50_hill_lowers pic50_hill_priors_lowers = np.array([-2., 0.]) drug, channel = drug_channel print "\n\n{} + {}\n\n".format(drug, channel) # for reproducible results, otherwise choose a different seed seed = 1 num_expts, experiment_numbers, experiments = dr.load_crumb_data( drug, channel) if (0 < (args.num_expts) < num_expts): num_expts = args.num_expts experiment_numbers = [x for x in experiment_numbers[:num_expts]] experiments = [x for x in experiments[:num_expts]] elif (args.num_expts == 0): print "Fitting to all datasets\n" else: print "You've asked to fit to an impossible number of experiments for {} + {}\n".format( drug, channel) print "Therefore proceeding with all experiments in the input data file\n" # set up where to save chains and figures to # also renames anything with a '/' in its name and changes it to a '_' drug, channel, output_dir, chain_dir, figs_dir, chain_file = dr.hierarchical_output_dirs_and_chain_file( drug, channel, num_expts) best_fits = [] for expt in experiment_numbers: start = time.time() x0 = np.array([2.5, 1.]) # (pIC50,Hill) not fitting sigma by CMA-ES sigma0 = 0.1 opts = cma.CMAOptions() opts['seed'] = expt es = cma.CMAEvolutionStrategy(x0, sigma0, opts) while not es.stop(): X = es.ask() es.tell(X, [ sum_of_square_diffs(x**2 + pic50_hill_priors_lowers, experiments[expt][:, 0], experiments[expt][:, 1]) for x in X ]) res = es.result best_fits.append( np.concatenate( (res[0]**2 + pic50_hill_priors_lowers, [initial_sigma(len(experiments[expt][:, 0]), res[1])]))) best_fits = np.array(best_fits) fig = plt.figure(figsize=(5.5, 4.5)) ax = fig.add_subplot(111) ax.set_xscale('log') xmin = 1000 xmax = -1000 for expt in experiments: a = np.min(expt[:, 0]) b = np.max(expt[:, 0]) if a < xmin: xmin = a if b > xmax: xmax = b xmin = int(np.log10(xmin)) - 1 xmax = int(np.log10(xmax)) + 3 num_x_pts = 101 x = np.logspace(xmin, xmax, num_x_pts) # from http://colorbrewer2.org colors = [ '#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99', '#e31a1c', '#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a', '#ffff99', '#b15928' ] skip_best_fits_plot = False if (num_expts > len(colors)): skip_best_fits_plot = True print "Not enough colours to print all experiments' best fits, so skipping that" if (not skip_best_fits_plot): for expt in experiment_numbers: print "best_fits:", best_fits print "best_fits[{}]:".format(expt), best_fits[expt] ax.plot(x, dr.dose_response_model( x, best_fits[expt, 1], dr.pic50_to_ic50(best_fits[expt, 0])), color=colors[expt], lw=2) ax.scatter(experiments[expt][:, 0], experiments[expt][:, 1], label='Expt {}'.format(expt + 1), color=colors[expt], s=100) ax.set_ylim(0, 100) ax.set_xlim(min(x), max(x)) ax.set_xlabel(r'{} concentration ($\mu$M)'.format(drug)) ax.set_ylabel('% {} block'.format(channel)) ax.legend(loc=2) ax.grid() ax.set_title('Hills = {}\nIC50s = {}'.format( [round(best_fits[expt, 1], 1) for expt in experiment_numbers], [ round(dr.pic50_to_ic50(best_fits[expt, 0]), 1) for expt in experiment_numbers ])) fig.tight_layout() fig.savefig(figs_dir + '{}_{}_cma-es_best_fits.png'.format(drug, channel)) fig.savefig(figs_dir + '{}_{}_cma-es_best_fits.pdf'.format(drug, channel)) plt.close() locs = np.array([0., 2., -4, 0.01, dr.sigma_loc]) # lower bounds for alpha,beta,mu,s,sigma sigma_cur = np.mean(best_fits[:, -1]) if (sigma_cur <= locs[3]): sigma_cur = locs[3] + 0.1 print "sigma_cur =", sigma_cur # find initial alpha and beta values by fitting log-logistic distribution to best fits # there is an inbuilt fit function, but I found it to be unreliable for some reason x0 = np.array([0.5, 0.5]) sigma0 = 0.1 opts = cma.CMAOptions() opts['seed'] = 1 es = cma.CMAEvolutionStrategy(x0, sigma0, opts) while not es.stop(): X = es.ask() es.tell(X, [ -np.product(st.fisk.pdf(best_fits[:, 1], c=x[1], scale=x[0], loc=0)) for x in X ]) res = es.result alpha_cur, beta_cur = np.copy(res[0]) if alpha_cur <= locs[0]: alpha_cur = locs[0] + 0.1 if beta_cur <= locs[1]: beta_cur = locs[1] + 0.1 # here I have used the fit function, for some reason this one worked more consitently # but again, the starting point for MCMC is not too important # a bad starting position can increase the time you have to run MCMC for to get a "converged" output # at worst, it can get stuck in a local optimum, but we haven't found this to be a problem yet mu_cur, s_cur = st.logistic.fit(best_fits[:, 0]) if mu_cur <= locs[2]: mu_cur = locs[2] + 0.1 if s_cur <= locs[3]: s_cur = locs[3] + 0.1 first_iteration = np.concatenate( ([alpha_cur, beta_cur, mu_cur, s_cur], best_fits[:, :-1].flatten(), [sigma_cur])) print "first mcmc iteration:\n", first_iteration # these are the numbers taken straight from Elkins (see paper for reference) elkins_hill_alphas = np.array([ 1.188, 1.744, 1.530, 0.930, 0.605, 1.325, 1.179, 0.979, 1.790, 1.708, 1.586, 1.469, 1.429, 1.127, 1.011, 1.318, 1.063 ]) elkins_hill_betas = 1. / np.array([ 0.0835, 0.1983, 0.2089, 0.1529, 0.1206, 0.2386, 0.2213, 0.2263, 0.1784, 0.1544, 0.2486, 0.2031, 0.2025, 0.1510, 0.1837, 0.1677, 0.0862 ]) elkins_pic50_mus = np.array([ 5.235, 5.765, 6.060, 5.315, 5.571, 7.378, 7.248, 5.249, 6.408, 5.625, 7.321, 6.852, 6.169, 6.217, 5.927, 7.414, 4.860 ]) elkins_pic50_sigmas = np.array([ 0.0760, 0.1388, 0.1459, 0.2044, 0.1597, 0.2216, 0.1856, 0.1560, 0.1034, 0.1033, 0.1914, 0.1498, 0.1464, 0.1053, 0.1342, 0.1808, 0.0860 ]) elkins = [ elkins_hill_alphas, elkins_hill_betas, elkins_pic50_mus, elkins_pic50_sigmas ] # building Gamma prior distributions for alpha,beta,mu,s(,sigma, but sigma not from elkins) # wide enough to cover Elkins values and allow room for extra variation alpha_mode = np.mean(elkins_hill_alphas) beta_mode = np.mean(elkins_hill_betas) mu_mode = np.mean(elkins_pic50_mus) s_mode = np.mean(elkins_pic50_sigmas) sigma_mode = dr.sigma_mode modes = np.array([alpha_mode, beta_mode - 2., mu_mode, s_mode, sigma_mode]) print "modes:", modes # designed for priors to have modes at means of elkins data, but width is more important shapes = np.array([5., 2.5, 7.5, 2.5, dr.sigma_shape]) # must all be greater than 1 scales = (modes - locs) / (shapes - 1.) labels = [r'$\alpha$', r'$\beta$', r'$\mu$', r'$s$', r'$\sigma$'] file_labels = ['alpha', 'beta', 'mu', 's', 'sigma'] # ranges to plot priors mins = [0, 0, -5, 0, 0] maxs = [8, 22, 20, 2, 25] prior_xs = [] priors = [] total_axes = (6, 4) fig = plt.figure(figsize=(6, 7)) for i in range(len(labels) - 1): if i == 0: axloc = (0, 0) elif i == 1: axloc = (0, 2) elif i == 2: axloc = (2, 0) elif i == 3: axloc = (2, 2) ax = plt.subplot2grid(total_axes, axloc, colspan=2, rowspan=2) x_prior = np.linspace(mins[i], maxs[i], 501) prior = st.gamma.pdf(x_prior, a=shapes[i], scale=scales[i], loc=locs[i]) prior_xs.append(x_prior) priors.append(prior) ax.plot(x_prior, prior, label='Gamma prior', lw=2) ax.set_xlabel(labels[i]) ax.set_ylabel('Probability density') ax.set_xlim(mins[i], maxs[i]) ax.grid() priormax = np.max(prior) hist, bin_edges = np.histogram(elkins[i], bins=10) histmax = np.max(hist) w = bin_edges[1] - bin_edges[0] bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2. # scaled histogram just to fit plot better, but this scaling doesn't matter ax.bar(bin_edges[:-1], priormax / histmax * hist, width=w, color='gray', edgecolor='grey') i = len(labels) - 1 ax = plt.subplot2grid(total_axes, (4, 1), colspan=2, rowspan=2) x_prior = np.linspace(mins[i], maxs[i], 501) prior = st.gamma.pdf(x_prior, a=shapes[i], scale=scales[i], loc=locs[i]) ax.plot(x_prior, prior, label='Gamma prior', lw=2) prior_xs.append(x_prior) priors.append(prior) ax.set_xlabel(labels[i]) ax.set_ylabel('Probability density') ax.set_xlim(mins[i], maxs[i]) ax.grid() fig.tight_layout() fig.savefig(figs_dir + 'all_prior_distributions.png') fig.savefig(figs_dir + 'all_prior_distributions.pdf') plt.close() #sys.exit # uncomment this if you just want to plot the priors and then quit # create/wipe MCMC output file with open(chain_file, 'w') as outfile: outfile.write( "# Hill ~ log-logistic(alpha,beta), pIC50 ~ logistic(mu,s)\n") outfile.write( "# alpha, beta, mu, s, hill_1, pic50_1, hill_2, pic50_2, ..., hill_Ne, pic50_Ne, sigma\n" ) # this is the order of parameters stored in the chain # have to choose initial covariance matrix for proposal distribution # we set it to a diagonal with entries scaled to the initial parameter values first_cov = np.diag(0.01 * np.abs(first_iteration)) mean_estimate = np.copy(first_iteration) dim = len(first_iteration) # we do not start adaptation straight away # just to give the algorithm a chance to look around # many of these pre-adaptation proposals will probably be rejected, if the initial step size is too lareg when_to_adapt = 100 * dim theta_cur = np.copy(first_iteration) cov_cur = np.copy(first_cov) print "theta_cur =", theta_cur log_target_cur = log_target_distribution(experiments, theta_cur, shapes, scales, locs) print "initial log_target_cur =", log_target_cur # effectively step size, scales covariance matrix loga = 0. # what fraction of proposed samples are being accepted into the chain acceptance = 0. # what fraction of samples we WANT accepted into the chain # loga updates itself to try to make this dream come true target_acceptance = 0.25 # perform thinning to reduce autocorrelation (make saved iterations more closely represent independent samples from target distribution) # also saves file space, win win thinning = args.thinning try: total_iterations = args.iterations except: total_iterations = 200000 # after what fraction of total_iterations to print a little status message status_when = 10000 saved_iterations = total_iterations / thinning + 1 pre_thin_burn = total_iterations / 4 # we discard the first quarter of iterations, as this gen burn = saved_iterations / 4 # pre-allocate the space for MCMC iterations # not a problem when we don't need to do LOADS of iterations # but might become more of a hassle if we wanted to run it for ages along with loads of parameters chain = np.zeros((saved_iterations, dim + 1)) chain[0, :] = np.copy(np.concatenate((first_iteration, [log_target_cur]))) # MCMC! start = time.time() t = 1 while t <= total_iterations: theta_star = npr.multivariate_normal(theta_cur, np.exp(loga) * cov_cur) log_target_star = log_target_distribution(experiments, theta_star, shapes, scales, locs) accept_prob = npr.rand() if (np.log(accept_prob) < log_target_star - log_target_cur): theta_cur = theta_star log_target_cur = log_target_star accepted = 1 else: accepted = 0 acceptance = ((t - 1.) * acceptance + accepted) / t if (t > when_to_adapt): s = t - when_to_adapt gamma_s = 1 / (s + 1)**0.6 temp_covariance_bit = np.array([theta_cur - mean_estimate]) cov_cur = (1 - gamma_s) * cov_cur + gamma_s * np.dot( np.transpose(temp_covariance_bit), temp_covariance_bit) mean_estimate = (1 - gamma_s) * mean_estimate + gamma_s * theta_cur loga += gamma_s * (accepted - target_acceptance) if t % thinning == 0: chain[t / thinning, :] = np.concatenate( (np.copy(theta_cur), [log_target_cur])) if (t % status_when == 0): print "{} / {}".format(t / status_when, total_iterations / status_when) time_taken_so_far = time.time() - start estimated_time_left = time_taken_so_far / t * (total_iterations - t) print "Time taken: {} s = {} min".format( np.round(time_taken_so_far, 1), np.round(time_taken_so_far / 60, 2)) print "acceptance = {}".format(np.round(acceptance, 5)) print "Estimated time remaining: {} s = {} min".format( np.round(estimated_time_left, 1), np.round(estimated_time_left / 60, 2)) t += 1 print "**********" print "final_iteration =", chain[-1, :] with open(chain_file, 'a') as outfile: np.savetxt(outfile, chain) # save (alpha,mu) samples to be used as (Hill,pIC50) values in AP simulations # these are direct 'top-level' samples, not samples from the posterior predictive distributions indices = npr.randint(burn, saved_iterations, args.num_APs) samples_file = dr.alpha_mu_downsampling(drug, channel) AP_samples = chain[indices, :] print "saving (alpha,mu) samples to", samples_file with open(samples_file, 'w') as outfile: outfile.write( '# {} (alpha,mu) samples from hierarchical MCMC for {} + {}\n'. format(args.num_APs, drug, channel)) np.savetxt(outfile, AP_samples[:, [0, 2]]) # this can be a quick visual check to see if the chain is mixing well # it will plot one big tall figure with all parameter paths plotted if args.plot_parameter_paths: fig = plt.figure(figsize=(10, 4 * dim)) ax0 = fig.add_subplot(dim, 1, 1) ax0.plot(chain[:, 0]) ax0.set_ylabel(r'$\alpha$') plt.setp(ax0.get_xticklabels(), visible=False) for i in range(1, dim): ax = fig.add_subplot(dim, 1, i + 1, sharex=ax0) ax.plot(chain[:t, i]) if i < dim - 1: plt.setp(ax.get_xticklabels(), visible=False) elif i == 1: y_label = r'$\beta$' elif i == 2: y_label = r'$\mu$' elif i == 3: y_label = r'$s$' elif (i % 2 == 0) and (i < dim - 1): y_label = r'$pIC50_{' + str(i / 2 - 1) + '}$' elif (i < dim - 1): y_label = r'$Hill_{' + str(i / 2 - 1) + '}$' else: y_label = r'$\sigma$' ax.set_xlabel('Iteration (thinned)') ax.set_ylabel(y_label) fig.tight_layout() fig.savefig(figs_dir + '{}_{}_parameter_paths.png'.format(drug, channel)) plt.close() # plot all marginal posteriors separately, after discarding burn-in # also a good visual check to see if it looks like they have converged marginals_dir = figs_dir + 'marginals/png/' if not os.path.exists(marginals_dir): os.makedirs(marginals_dir) for i in range(dim): fig = plt.figure(figsize=(5, 4)) ax = fig.add_subplot(111) ax.hist(chain[burn:, i], bins=50, normed=True, color='blue', edgecolor='blue') ax.set_ylabel('Marginal probability density') if i == 0: x_label = r'$\alpha$' filename = 'alpha' elif i == 1: x_label = r'$\beta$' filename = 'beta' elif i == 2: x_label = r'$\mu$' filename = 'mu' elif i == 3: x_label = r'$s$' filename = 's' elif (i % 2 == 0) and (i < dim - 1): x_label = r'$Hill_{' + str(i / 2 - 1) + '}$' filename = 'hill_{}'.format(i / 2 - 1) elif (i < dim - 1): x_label = r'$pIC50_{' + str(i / 2 - 1) + '}$' filename = 'pic50_{}'.format(i / 2 - 1) else: x_label = r'$\sigma$' filename = 'sigma' ax.set_xlabel(x_label) fig.tight_layout() fig.savefig(marginals_dir + '{}_{}_{}_marginal.png'.format(drug, channel, filename)) #fig.savefig(marginals_dir+'{}_{}_{}_marginal.pdf'.format(drug,channel,filename)) plt.close() total_axes = (6, 4) fig = plt.figure(figsize=(6, 7)) for i in range(5): # have to do sigma separately if i == 0: axloc = (0, 0) elif i == 1: axloc = (0, 2) elif i == 2: axloc = (2, 0) elif i == 3: axloc = (2, 2) elif i == 4: axloc = (4, 0) ax = plt.subplot2grid(total_axes, axloc, colspan=2, rowspan=2) ax.set_xlabel(labels[i]) ax.set_ylabel('Probability density') ax.grid() if (i < 4): min_sample = np.min(chain[burn:, i]) max_sample = np.max(chain[burn:, i]) ax.hist(chain[burn:, i], bins=50, normed=True, color='blue', edgecolor='blue') elif (i == 4): min_sample = np.min(chain[burn:, -2]) max_sample = np.max(chain[burn:, -2]) ax.hist(chain[burn:, -2], bins=50, normed=True, color='blue', edgecolor='blue') # -1 would be log-target ax.set_xlim(min_sample, max_sample) pts_in_this_range = np.where((prior_xs[i] >= min_sample) & (prior_xs[i] <= max_sample)) x_in_this_range = prior_xs[i][pts_in_this_range] prior_in_this_range = priors[i][pts_in_this_range] line = ax.plot(x_in_this_range, prior_in_this_range, lw=2, color='red', label='Prior distributions') if (i == 0 or i == 3): plt.xticks(rotation=90) leg_ax = plt.subplot2grid(total_axes, (4, 2), colspan=2, rowspan=2) leg_ax.axis('off') hist = mpatches.Patch(color='blue', label='Normalised histograms') leg_ax.legend(handles=line + [hist], loc="center", fontsize=12, bbox_to_anchor=[0.38, 0.7]) fig.tight_layout() fig.savefig(figs_dir + 'all_prior_distributions_and_marginals.png') fig.savefig(figs_dir + 'all_prior_distributions_and_marginals.pdf') plt.close() print "Marginal plots saved in", marginals_dir print "\n\n{} + {} complete!\n\n".format(drug, channel)
def sum_of_square_diffs(unscaled_params, doses, responses): hill = unscaled_params[0]**2 # restricting Hill>0 pIC50 = unscaled_params[1]**2 - 1 # restricting pIC50>-1 IC50 = dr.pic50_to_ic50(pIC50) test_responses = dr.dose_response_model(doses, hill, IC50) return np.sum((test_responses - responses)**2)
def plot_mcmc_samples(drug_channel): drug, channel = drug_channel fig = plt.figure(figsize=(5, 8)) axes = {} axes[1] = fig.add_subplot(211) axes[2] = fig.add_subplot(212) #drug = "Amiodarone" #channel = "hERG" # drug = "Lopinavir" # channel = "Kir2.1" num_models = 2 for model in xrange(1, num_models + 1): dr.define_model(model) chain_file = dr.define_chain_file(model, drug, channel, temperature) num_expts, experiment_numbers, experiments = dr.load_crumb_data( drug, channel) figs_dir = dr.drug_channel_figs_dir(drug, channel) concs = np.array([]) responses = np.array([]) for i in xrange(num_expts): concs = np.concatenate((concs, experiments[i][:, 0])) responses = np.concatenate((responses, experiments[i][:, 1])) how_many_samples_to_plot = 1200 mcmc_samples = np.loadtxt(chain_file, usecols=range(dr.num_params)) saved_its = mcmc_samples.shape[0] sample_indices = npr.randint(0, saved_its, how_many_samples_to_plot) mcmc_samples = mcmc_samples[sample_indices] conc_min = np.min(concs) conc_max = np.max(concs) fsize = 14 num_pts = 101 x_range = np.logspace( int(np.log10(conc_min)) - 1, int(np.log10(conc_max)) + 2, num_pts) axes[model].set_xscale('log') axes[model].grid() axes[model].set_ylabel(r"% {} block".format(channel), fontsize=fsize) axes[model].set_xlabel(r"{} concentration ($\mu$M)".format(drug), fontsize=fsize) axes[model].set_ylim(0, 100) for i in xrange(how_many_samples_to_plot): if model == 1: pic50 = mcmc_samples[i, 0] hill = 1 title = "$M_1$, fixed $Hill=1$, varying $pIC50$" elif model == 2: pic50, hill = mcmc_samples[i, :2] title = "$M_2$, varying $pIC50$ and $Hill$" axes[model].plot(x_range, dr.dose_response_model(x_range, hill, dr.pic50_to_ic50(pic50)), color='black', alpha=0.01) axes[model].plot(concs, responses, 'o', color='orange', ms=10, label="Expt data") axes[model].set_title(title, fontsize=fsize) axes[model].legend(loc=2) #axes[2].set_yticklabels([]) fig.tight_layout() fig.savefig(all_figs_dir + '{}_{}_mcmc_samples.png'.format(drug, channel)) fig.savefig(figs_dir + '{}_{}_mcmc_samples.pdf'.format(drug, channel, model)) plt.close() return None
def run(drug_channel): drug, channel = drug_channel print "\n\n{} + {}\n\n".format(drug, channel) num_expts, experiment_numbers, experiments = dr.load_crumb_data( drug, channel) if (0 < args.num_expts < num_expts): num_expts = args.num_expts drug, channel, output_dir, chain_dir, figs_dir, chain_file = dr.hierarchical_output_dirs_and_chain_file( drug, channel, num_expts) chain = np.loadtxt(chain_file) end = chain.shape[0] burn = end / 4 pic50_samples = np.zeros(args.num_hist_samples) hill_samples = np.zeros(args.num_hist_samples) rand_idx = npr.randint(burn, end, args.num_hist_samples) for t in xrange(args.num_hist_samples): alpha, beta, mu, s = chain[rand_idx[t], :4] hill_samples[t] = st.fisk.rvs(c=beta, scale=alpha, loc=0) pic50_samples[t] = st.logistic.rvs(mu, s) num_pts = 40 fig = plt.figure(figsize=(11, 7)) ax1 = fig.add_subplot(231) ax1.grid() xmin = -4 xmax = 3 concs = np.logspace(xmin, xmax, num_pts) ax1.set_xscale('log') ax1.set_ylim(0, 100) ax1.set_xlabel(r'{} concentration ($\mu$M)'.format(drug)) ax1.set_ylabel(r'% {} block'.format(channel)) ax1.set_title('A. Hierarchical predicted\nfuture experiments') ax1.set_xlim(10**xmin, 10**xmax) for expt in experiment_numbers: ax1.scatter(experiments[expt][:, 0], experiments[expt][:, 1], label='Expt {}'.format(expt + 1), color=colors[expt], s=100, zorder=10) for i, conc in enumerate(args.concs): ax1.axvline(conc, color=colors[3 + i], lw=2, label=r"{} $\mu$M".format(conc), alpha=0.8) subset_idx = npr.randint(0, args.num_hist_samples, args.num_samples) for i in xrange( args.num_samples ): # only plot the first T of the H samples (should be fine because they're all randomly selected) ax1.plot(concs, dr.dose_response_model(concs, hill_samples[i], dr.pic50_to_ic50(pic50_samples[i])), color='black', alpha=0.01) lfs = 9 ax1.legend(loc=2, fontsize=lfs) ax2 = fig.add_subplot(234) ax2.set_xlim(0, 100) ax2.set_xlabel(r'% {} block'.format(channel)) ax2.set_ylabel(r'Probability density') ax2.grid() for i, conc in enumerate(args.concs): ax2.hist(dr.dose_response_model(conc, hill_samples, dr.pic50_to_ic50(pic50_samples)), bins=50, normed=True, color=colors[3 + i], alpha=0.8, lw=0, label=r"{} $\mu$M {}".format(conc, drug)) ax2.set_title('D. Hierarchical predicted\nfuture experiments') ax2.legend(loc="best", fontsize=lfs) ax3 = fig.add_subplot(232, sharey=ax1, sharex=ax1) ax3.grid() ax3.set_xscale('log') ax3.set_ylim(0, 100) ax3.set_xlabel(r'{} concentration ($\mu$M)'.format(drug)) ax3.set_title('B. Hierarchical inferred\nunderlying effects') ax3.set_xlim(10**xmin, 10**xmax) for expt in experiment_numbers: ax3.scatter(experiments[expt][:, 0], experiments[expt][:, 1], label='Expt {}'.format(expt + 1), color=colors[expt], s=100, zorder=10) alpha_indices = npr.randint(burn, end, args.num_samples) alpha_samples = chain[alpha_indices, 0] mu_samples = chain[alpha_indices, 2] for i, conc in enumerate(args.concs): ax3.axvline(conc, color=colors[3 + i], lw=2, label=r"{} $\mu$M".format(conc), alpha=0.8) for i in xrange(args.num_samples): ax3.plot(concs, dr.dose_response_model(concs, alpha_samples[i], dr.pic50_to_ic50(mu_samples[i])), color='black', alpha=0.01) ax3.legend(loc=2, fontsize=lfs) ax4 = fig.add_subplot(235, sharey=ax2, sharex=ax2) ax4.set_xlim(0, 100) ax4.set_xlabel(r'% {} block'.format(channel)) ax4.grid() hist_indices = npr.randint(burn, end, args.num_hist_samples) alphas = chain[hist_indices, 0] mus = chain[hist_indices, 2] for i, conc in enumerate(args.concs): ax4.hist(dr.dose_response_model(conc, alphas, dr.pic50_to_ic50(mus)), bins=50, normed=True, color=colors[3 + i], alpha=0.8, lw=0, label=r"{} $\mu$M {}".format(conc, drug)) ax4.set_title('E. Hierarchical inferred\nunderlying effects') plt.setp(ax3.get_yticklabels(), visible=False) plt.setp(ax4.get_yticklabels(), visible=False) # now plot non-hierarchical num_params = 3 temperature = 1 if args.fix_hill: model = 1 else: model = 2 drug, channel, chain_file, figs_dir = dr.nonhierarchical_chain_file_and_figs_dir( model, drug, channel, temperature) chain = np.loadtxt( chain_file, usecols=range(num_params - 1)) # not interested in log-target values right now end = chain.shape[0] burn = end / 4 sample_indices = npr.randint(burn, end, args.num_samples) samples = chain[sample_indices, :] ax5 = fig.add_subplot(233, sharey=ax1, sharex=ax1) ax5.grid() plt.setp(ax5.get_yticklabels(), visible=False) ax5.set_xscale('log') ax5.set_ylim(0, 100) ax5.set_xlim(10**xmin, 10**xmax) ax5.set_xlabel(r'{} concentration ($\mu$M)'.format(drug)) ax5.set_title('C. Single-level inferred\neffects') ax4.legend(loc="best", fontsize=lfs) for expt in experiment_numbers: if expt == 1: ax5.scatter(experiments[expt][:, 0], experiments[expt][:, 1], color='orange', s=100, label='All expts', zorder=10) else: ax5.scatter(experiments[expt][:, 0], experiments[expt][:, 1], color='orange', s=100, zorder=10) for i, conc in enumerate(args.concs): ax5.axvline(conc, color=colors[3 + i], alpha=0.8, lw=2, label=r"{} $\mu$M".format(conc)) for i in xrange(args.num_samples): pic50, hill = samples[i, :] ax5.plot(concs, dr.dose_response_model(concs, hill, dr.pic50_to_ic50(pic50)), color='black', alpha=0.01) ax5.legend(loc=2, fontsize=lfs) sample_indices = npr.randint(burn, end, args.num_hist_samples) samples = chain[sample_indices, :] ax6 = fig.add_subplot(236, sharey=ax2, sharex=ax2) ax6.set_xlim(0, 100) ax6.set_xlabel(r'% {} block'.format(channel)) plt.setp(ax6.get_yticklabels(), visible=False) ax6.grid() for i, conc in enumerate(args.concs): ax6.hist(dr.dose_response_model(conc, samples[:, 1], dr.pic50_to_ic50(samples[:, 0])), bins=50, normed=True, alpha=0.8, color=colors[3 + i], lw=0, label=r"{} $\mu$M {}".format(conc, drug)) ax6.set_title('F. Single-level inferred\neffects') ax6.legend(loc="best", fontsize=lfs) plot_dir = dr.all_predictions_dir(drug, channel) fig.tight_layout() png_file = plot_dir + '{}_{}_all_predictions_corrected.png'.format( drug, channel) print png_file fig.savefig(png_file) pdf_file = plot_dir + '{}_{}_all_predictions_corrected.pdf'.format( drug, channel) print pdf_file fig.savefig( pdf_file ) # uncomment to save as pdf, or change extension to whatever you want plt.close() print "Figures saved in", plot_dir
def run(drug, channel): print "\n\n{} + {}\n\n".format(drug, channel) num_expts, experiment_numbers, experiments = dr.load_crumb_data( drug, channel) if (0 < (args.num_expts) < num_expts): num_expts = args.num_expts experiment_numbers = [x for x in experiment_numbers[:num_expts]] experiments = [x for x in experiments[:num_expts]] drug, channel, output_dir, chain_dir, figs_dir, chain_file = dr.hierarchical_output_dirs_and_chain_file( drug, channel, num_expts) chain = np.loadtxt(chain_file) end, num_params = chain.shape burn = end / 4 top_params = ['alpha', 'beta', 'mu', 's', 'sigma'] top_param_indices = [0, 1, 2, 3, num_params - 2] mid_param_indices = [ i for i in range(num_params - 1) if i not in top_param_indices ] num_expts = len(mid_param_indices) / 2 if num_expts <= 4: # qualitative and colourblind safe, apparently colors = ['#a6cee3', '#1f78b4', '#b2df8a', '#33a02c'] colors = ['#d7191c', '#fdae61', '#2c7bb6'] else: colors = [ '#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99', '#e31a1c', '#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a' ] top_param_labels = [r'$\alpha$', r'$\beta$', r'$\mu$', r'$s$', r'$\sigma$'] num_curves = 50 indices = npr.randint(burn, end, num_curves) samples = chain[indices, :] all_fig = plt.figure(figsize=(4, 8)) curves = all_fig.add_subplot(311) curves.set_xlabel(r'{} concentration ($\mu$M)'.format(drug)) curves.set_ylabel(r'% {} block'.format(channel)) curves.grid() curves.set_xscale('log') x_range = np.logspace(-4, 2, 201) for j in xrange(num_curves): for i in xrange(3): response = dr.dose_response_model( x_range, samples[j, 4 + 2 * i], dr.pic50_to_ic50(samples[j, 4 + 2 * i + 1])) curves.plot(x_range, response, color=colors[i], alpha=0.2) for i, expt in enumerate(experiments): curves.plot(expt[:, 0], expt[:, 1], 'o', color=colors[i], zorder=10, ms=10) curves.set_xlim(10**-4, 10**2) pic50s = all_fig.add_subplot(312) pic50s.grid() pic50s.set_ylabel('Probability density') pic50s.set_xlabel(r'$pIC50_i$') hills = all_fig.add_subplot(313) hills.grid() hills.set_ylabel('Probability density') hills.set_xlabel(r'$Hill_i$') alpha = 1. / (num_expts - 1) for i, col in enumerate(mid_param_indices): color = matplotlib.colors.ColorConverter().to_rgba(colors[i / 2], alpha=alpha) if i % 2 == 0: label = r'$Hill_{}$'.format(i / 2 + 1) file_label = 'Hill_{}'.format(i / 2 + 1) hills.hist(chain[burn:, col], normed=True, bins=40, color=color, edgecolor='none', label=r'$i = {}$'.format(i / 2 + 1)) else: label = r'$pIC50_{}$'.format(i / 2 + 1) file_label = 'pIC50_{}'.format(i / 2 + 1) pic50s.hist(chain[burn:, col], normed=True, bins=40, color=color, edgecolor='none', label=r'$i = {}$'.format(i / 2 + 1)) hills.legend(loc=1, fontsize=10) all_fig.tight_layout() all_fig.savefig( figs_dir + '{}_{}_hierarchical_curves_and_hists.png'.format(drug, channel)) #all_fig.savefig(figs_dir+'{}_{}_hierarchical_curves_and_hists.pdf'.format(drug,channel)) plt.show(block=True) print "Figures saved in", figs_dir print "\n\n{} + {} done\n\n".format(drug, channel)
def do_plots(drug_channel): top_drug, top_channel = drug_channel num_expts, experiment_numbers, experiments = dr.load_crumb_data( top_drug, top_channel) concs = np.array([]) responses = np.array([]) for i in xrange(num_expts): concs = np.concatenate((concs, experiments[i][:, 0])) responses = np.concatenate((responses, experiments[i][:, 1])) xmin = 1000 xmax = -1000 for expt in experiments: a = np.min(expt[:, 0]) b = np.max(expt[:, 0]) if a > 0 and a < xmin: xmin = a if b > xmax: xmax = b xmin = int(np.log10(xmin)) - 2 xmax = int(np.log10(xmax)) + 3 x = np.logspace(xmin, xmax, num_pts) fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(9, 4), sharey=True, sharex=True) ax1.set_xscale('log') ax1.grid() ax2.grid() ax1.set_xlim(10**xmin, 10**xmax) ax1.set_ylim(0, 100) ax1.set_xlabel(r'{} concentration ($\mu$M)'.format(top_drug)) ax2.set_xlabel(r'{} concentration ($\mu$M)'.format(top_drug)) ax1.set_ylabel(r'% {} block'.format(top_channel)) model = 1 drug, channel, chain_file, images_dir = dr.nonhierarchical_chain_file_and_figs_dir( model, top_drug, top_channel, temperature) chain = np.loadtxt(chain_file) best_idx = np.argmax(chain[:, -1]) best_pic50, best_sigma = chain[best_idx, [0, 1]] saved_its, h = chain.shape rand_idx = npr.randint(saved_its, size=num_curves) pic50s = chain[rand_idx, 0] ax1.set_title(r'Model 1: $pIC50 = {}$, fixed $Hill = 1$'.format( np.round(best_pic50, 2))) for i in xrange(num_curves): ax1.plot(x, dr.dose_response_model(x, 1., dr.pic50_to_ic50(pic50s[i])), color='black', alpha=0.02) max_pd_curve = dr.dose_response_model(x, 1., dr.pic50_to_ic50(best_pic50)) ax1.plot(x, max_pd_curve, label='Max PD', lw=1.5, color='red') ax1.plot(concs, responses, "o", color='orange', ms=10, label='Data', zorder=10) anyArtist = plt.Line2D((0, 1), (0, 0), color='k') handles, labels = ax1.get_legend_handles_labels() if drug == "Quinine" and channel == "Nav1.5-late": loc = 4 else: loc = 2 ax1.legend(handles + [anyArtist], labels + ["Samples"], loc=loc) model = 2 drug, channel, chain_file, images_dir = dr.nonhierarchical_chain_file_and_figs_dir( model, top_drug, top_channel, temperature) chain = np.loadtxt(chain_file) best_idx = np.argmax(chain[:, -1]) best_pic50, best_hill, best_sigma = chain[best_idx, [0, 1, 2]] ax2.set_title(r"Model 2: $pIC50={}$, Hill = {}".format( np.round(best_pic50, 2), np.round(best_hill, 2))) saved_its, h = chain.shape rand_idx = npr.randint(saved_its, size=num_curves) pic50s = chain[rand_idx, 0] hills = chain[rand_idx, 1] for i in xrange(num_curves): ax2.plot(x, dr.dose_response_model(x, hills[i], dr.pic50_to_ic50(pic50s[i])), color='black', alpha=0.02) max_pd_curve = dr.dose_response_model(x, best_hill, dr.pic50_to_ic50(best_pic50)) ax2.plot(x, max_pd_curve, label='Max PD', lw=1.5, color='red') ax2.plot(concs, responses, "o", color='orange', ms=10, label='Data', zorder=10) handles, labels = ax2.get_legend_handles_labels() ax2.legend(handles + [anyArtist], labels + ["Samples"], loc=loc) fig.tight_layout() #plt.show(block=True) #sys.exit() fig.savefig("{}_{}_nonh_both_models_mcmc_prediction_curves.png".format( drug, channel)) plt.close() return None
def run(drug_channel): drug,channel = drug_channel print "\n\n{} + {}\n\n".format(drug,channel) num_expts, experiment_numbers, experiments = dr.load_crumb_data(drug,channel) if (0 < args.num_expts < num_expts): num_expts = args.num_expts drug, channel, output_dir, chain_dir, figs_dir, chain_file = dr.hierarchical_output_dirs_and_chain_file(drug,channel,num_expts) hill_cdf_file, pic50_cdf_file = dr.hierarchical_posterior_predictive_cdf_files(drug,channel,num_expts) hill_cdf = np.loadtxt(hill_cdf_file) pic50_cdf = np.loadtxt(pic50_cdf_file) num_samples = 2000 unif_hill_samples = npr.rand(num_samples) unif_pic50_samples = npr.rand(num_samples) hill_samples = np.interp(unif_hill_samples, hill_cdf[:,1], hill_cdf[:,0]) pic50_samples = np.interp(unif_pic50_samples, pic50_cdf[:,1], pic50_cdf[:,0]) fig = plt.figure(figsize=(11,7)) ax1 = fig.add_subplot(231) ax1.grid() xmin = -4 xmax = 3 concs = np.logspace(xmin,xmax,101) ax1.set_xscale('log') ax1.set_ylim(0,100) ax1.set_xlabel(r'{} concentration ($\mu$M)'.format(drug)) ax1.set_ylabel(r'% {} block'.format(channel)) ax1.set_title('A. Hierarchical predicted\nfuture experiments') ax1.set_xlim(10**xmin,10**xmax) for expt in experiment_numbers: ax1.scatter(experiments[expt][:,0],experiments[expt][:,1],label='Expt {}'.format(expt+1),color=colors[expt],s=100,zorder=10) for i, conc in enumerate(args.concs): ax1.axvline(conc,color=colors[3+i],lw=2,label=r"{} $\mu$M".format(conc),alpha=0.8) for i in xrange(num_samples): ax1.plot(concs,dr.dose_response_model(concs,hill_samples[i],dr.pic50_to_ic50(pic50_samples[i])),color='black',alpha=0.01) ax1.legend(loc=2,fontsize=10) num_hist_samples = 100000 unif_hill_samples = npr.rand(num_hist_samples) unif_pic50_samples = npr.rand(num_hist_samples) hill_samples = np.interp(unif_hill_samples, hill_cdf[:,1], hill_cdf[:,0]) pic50_samples = np.interp(unif_pic50_samples, pic50_cdf[:,1], pic50_cdf[:,0]) ax2 = fig.add_subplot(234) ax2.set_xlim(0,100) ax2.set_xlabel(r'% {} block'.format(channel)) ax2.set_ylabel(r'Probability density') ax2.grid() for i, conc in enumerate(args.concs): ax2.hist(dr.dose_response_model(conc,hill_samples,dr.pic50_to_ic50(pic50_samples)),bins=50,normed=True,color=colors[3+i],alpha=0.8,edgecolor='none',label=r"{} $\mu$M {}".format(conc,drug)) ax2.set_title('D. Hierarchical predicted\nfuture experiments') ax2.legend(loc=2,fontsize=10) ax3 = fig.add_subplot(232,sharey=ax1) ax3.grid() xmin = -4 xmax = 3 concs = np.logspace(xmin,xmax,101) ax3.set_xscale('log') ax3.set_ylim(0,100) ax3.set_xlabel(r'{} concentration ($\mu$M)'.format(drug)) ax3.set_title('B. Hierarchical inferred\nunderlying effects') ax3.set_xlim(10**xmin,10**xmax) for expt in experiment_numbers: ax3.scatter(experiments[expt][:,0],experiments[expt][:,1],label='Expt {}'.format(expt+1),color=colors[expt],s=100,zorder=10) chain = np.loadtxt(chain_file) end = chain.shape[0] burn = end/4 num_samples = 1000 alpha_indices = npr.randint(burn,end,num_samples) alpha_samples = chain[alpha_indices,0] mu_samples = chain[alpha_indices,2] for i, conc in enumerate(args.concs): ax3.axvline(conc,color=colors[3+i],lw=2,label=r"{} $\mu$M".format(conc),alpha=0.8) for i in xrange(num_samples): ax3.plot(concs,dr.dose_response_model(concs,alpha_samples[i],dr.pic50_to_ic50(mu_samples[i])),color='black',alpha=0.01) ax3.legend(loc=2,fontsize=10) ax4 = fig.add_subplot(235,sharey=ax2) ax4.set_xlim(0,100) ax4.set_xlabel(r'% {} block'.format(channel)) ax4.grid() num_hist_samples = 100000 hist_indices = npr.randint(burn,end,num_hist_samples) alphas = chain[hist_indices,0] mus = chain[hist_indices,2] for i, conc in enumerate(args.concs): ax4.hist(dr.dose_response_model(conc,alphas,dr.pic50_to_ic50(mus)),bins=50,normed=True,color=colors[3+i],alpha=0.8,edgecolor='none',label=r"{} $\mu$M {}".format(conc,drug)) ax4.set_title('E. Hierarchical inferred\nunderlying effects') plt.setp(ax3.get_yticklabels(), visible=False) plt.setp(ax4.get_yticklabels(), visible=False) # now plot non-hierarchical num_params = 3 drug,channel,chain_file,figs_dir = dr.nonhierarchical_chain_file_and_figs_dir(drug, channel, args.fix_hill) chain = np.loadtxt(chain_file,usecols=range(num_params-1)) # not interested in log-target values right now end = chain.shape[0] burn = end/4 num_samples = 1000 sample_indices = npr.randint(burn,end,num_samples) samples = chain[sample_indices,:] ax5 = fig.add_subplot(233,sharey=ax1) ax5.grid() plt.setp(ax5.get_yticklabels(), visible=False) xmin = -4 xmax = 4 concs = np.logspace(xmin,xmax,101) ax5.set_xscale('log') ax5.set_ylim(0,100) ax5.set_xlim(10**xmin,10**xmax) ax5.set_xlabel(r'{} concentration ($\mu$M)'.format(drug)) ax5.set_title('C. Single-level inferred\neffects') ax5.legend(fontsize=10) for expt in experiment_numbers: if expt==1: ax5.scatter(experiments[expt][:,0],experiments[expt][:,1],color='orange',s=100,label='All expts',zorder=10) else: ax5.scatter(experiments[expt][:,0],experiments[expt][:,1],color='orange',s=100,zorder=10) for i, conc in enumerate(args.concs): ax5.axvline(conc,color=colors[3+i],alpha=0.8,lw=2,label=r"{} $\mu$M".format(conc)) for i in xrange(num_samples): ax5.plot(concs,dr.dose_response_model(concs,samples[i,0],dr.pic50_to_ic50(samples[i,1])),color='black',alpha=0.01) ax5.legend(loc=2,fontsize=10) num_hist_samples = 50000 sample_indices = npr.randint(burn,end,num_hist_samples) samples = chain[sample_indices,:] ax6 = fig.add_subplot(236,sharey=ax2) ax6.set_xlim(0,100) ax6.set_xlabel(r'% {} block'.format(channel)) plt.setp(ax6.get_yticklabels(), visible=False) ax6.grid() for i, conc in enumerate(args.concs): ax6.hist(dr.dose_response_model(conc,samples[:,0],dr.pic50_to_ic50(samples[:,1])),bins=50,normed=True,alpha=0.8,color=colors[3+i],edgecolor='none',label=r"{} $\mu$M {}".format(conc,drug)) ax6.set_title('F. Single-level inferred\neffects') ax2.legend(loc=2,fontsize=10) plot_dir = dr.all_predictions_dir(drug,channel) fig.tight_layout() fig.savefig(plot_dir+'{}_{}_all_predictions.png'.format(drug,channel)) fig.savefig(plot_dir+'{}_{}_all_predictions.pdf'.format(drug,channel)) # uncomment to save as pdf, or change extension to whatever you want plt.close() print "Figures saved in", plot_dir
def plot_mcmc_samples(drug_channel): drug, channel = drug_channel fig = plt.figure(figsize=(5, 8)) axes = {} axes[1] = fig.add_subplot(211) axes[2] = fig.add_subplot(212) #drug = "Amiodarone" #channel = "hERG" # drug = "Lopinavir" # channel = "Kir2.1" num_models = 2 for model in xrange(1, num_models+1): dr.define_model(model) chain_file = dr.define_chain_file(model, drug, channel, temperature) num_expts, experiment_numbers, experiments = dr.load_crumb_data(drug, channel) figs_dir = dr.drug_channel_figs_dir(drug, channel) concs = np.array([]) responses = np.array([]) for i in xrange(num_expts): concs = np.concatenate((concs, experiments[i][:, 0])) responses = np.concatenate((responses, experiments[i][:, 1])) how_many_samples_to_plot = 1200 mcmc_samples = np.loadtxt(chain_file, usecols=range(dr.num_params)) saved_its = mcmc_samples.shape[0] sample_indices = npr.randint(0, saved_its, how_many_samples_to_plot) mcmc_samples = mcmc_samples[sample_indices] conc_min = np.min(concs) conc_max = np.max(concs) fsize = 14 num_pts = 101 x_range = np.logspace(int(np.log10(conc_min))-1, int(np.log10(conc_max))+2, num_pts) axes[model].set_xscale('log') axes[model].grid() axes[model].set_ylabel(r"% {} block".format(channel), fontsize=fsize) axes[model].set_xlabel(r"{} concentration ($\mu$M)".format(drug), fontsize=fsize) axes[model].set_ylim(0,100) for i in xrange(how_many_samples_to_plot): if model == 1: pic50 = mcmc_samples[i,0] hill = 1 title = "$M_1$, fixed $Hill=1$, varying $pIC50$" elif model == 2: pic50, hill = mcmc_samples[i,:2] title = "$M_2$, varying $pIC50$ and $Hill$" axes[model].plot(x_range, dr.dose_response_model(x_range, hill, dr.pic50_to_ic50(pic50)),color='black',alpha=0.01) axes[model].plot(concs, responses, 'o', color='orange', ms=10, label="Expt data") axes[model].set_title(title, fontsize = fsize) axes[model].legend(loc=2) #axes[2].set_yticklabels([]) fig.tight_layout() fig.savefig(all_figs_dir+'{}_{}_mcmc_samples.png'.format(drug, channel)) fig.savefig(figs_dir+'{}_{}_mcmc_samples.pdf'.format(drug,channel,model)) plt.close() return None
def do_plot(drug_channel): global concs, responses fig = plt.figure(figsize=(5, 8)) axes = {} axes[1] = fig.add_subplot(211) axes[2] = fig.add_subplot(212)#, sharey=axes[1]) fsize = 14 for model in xrange(1, num_models+1): dr.define_model(model) drug, channel = drug_channel num_expts, experiment_numbers, experiments = dr.load_crumb_data(drug, channel) figs_dir = dr.drug_channel_figs_dir(drug, channel) concs = np.array([]) responses = np.array([]) for i in xrange(num_expts): concs = np.concatenate((concs, experiments[i][:, 0])) responses = np.concatenate((responses, experiments[i][:, 1])) if model == 1: x0 = np.ones(2) sigma0 = 0.1 elif model == 2: x0 = np.copy([pic50, hill]) sigma0 = 0.01 #x0[0] = 6.9 opts = cma.CMAOptions() es = cma.CMAEvolutionStrategy(x0, sigma0, opts) while not es.stop(): X = es.ask() f_vals = [sum_of_square_diffs(x, model) for x in X] es.tell(X, f_vals) es.disp() res = es.result() ss = res[1] pic50, hill = res[0] if model == 1: hill = 1 conc_min = np.min(concs) conc_max = np.max(concs) num_pts = 501 x_range = np.logspace(int(np.log10(conc_min))-1, int(np.log10(conc_max))+2, num_pts) predicted = dr.dose_response_model(x_range, hill, dr.pic50_to_ic50(pic50)) #fig = plt.figure(figsize=(5,4)) #ax = fig.add_subplot(111) axes[model].grid() axes[model].set_xscale('log') axes[model].set_ylim(0,100) axes[model].set_ylabel(r"% {} block".format(channel),fontsize=fsize) axes[model].set_xlabel(r"{} concentration ($\mu$M)".format(drug),fontsize=fsize) axes[model].plot(x_range, predicted, color='blue', lw=2, label="Best fit") axes[model].plot(concs, responses, 'o', color='orange', ms=10, label="Expt data") axes[model].legend(loc=2) axes[model].set_title("$M_{}, pIC50 = {}, Hill = {}, SS = {}$".format(model, round(pic50,2), round(hill,2), round(ss,2)),fontsize=fsize) #axes[2].set_yticklabels([]) fig.tight_layout() #fig.savefig(figs_dir+"{}_{}_model_{}_best_fit.png".format(drug,channel,model)) fig.savefig(all_figs_dir+"{}_{}_best_fits.png".format(drug, channel)) fig.savefig(figs_dir+"{}_{}_best_fit.pdf".format(drug,channel)) plt.close()
def sum_of_square_diffs(_params, doses, responses): pIC50, hill = _params IC50 = dr.pic50_to_ic50(pIC50) test_responses = dr.dose_response_model(doses, hill, IC50) return np.sum((test_responses - responses)**2)
# could technically save 50% of the space for Model 1 by not bothering to save Hill=1 in every sample... with open(txt_file, "w") as outfile: outfile.write( "# {} (pIC50,Hill) samples from single-level MCMC (model {}) for {} + {}\n" .format(args.num_samples, args.model, drug, channel)) np.savetxt(outfile, chain) fig, ax = plt.subplots(1, 1, figsize=(5, 4)) ax.grid() ax.set_xlabel("{} concentration ($\mu$M)".format(drug)) ax.set_ylabel("% {} block".format(channel)) ax.set_xscale("log") x = np.logspace(min_x, max_x, num_x_pts) for t in xrange(args.num_samples): pic50, hill = chain[t, :] predicted_response_curve = dr.dose_response_model( x, hill, dr.pic50_to_ic50(pic50)) ax.plot(x, predicted_response_curve, color='black', alpha=alpha) if args.plot_data: ax.plot(concs, responses, 'o', color='orange', ms=8, zorder=10) fig.tight_layout() fig.savefig(png_file) print "\nSaved {}\n".format(samples_png) if args.save_pdf: fig.savefig(samples_pdf) print "\nSaved {}\n".format(samples_pdf) plt.close()
def run_single_level(drug_channel): drug, channel = drug_channel num_expts, experiment_numbers, experiments = dr.load_crumb_data( drug, channel) drug, channel, chain_file, images_dir = dr.nonhierarchical_chain_file_and_figs_dir( drug, channel) num_params = 3 # hill, pic50, mu concs = np.array([]) responses = np.array([]) for i in xrange(num_expts): concs = np.concatenate((concs, experiments[i][:, 0])) responses = np.concatenate((responses, experiments[i][:, 1])) print experiments print concs print responses # uniform prior intervals hill_prior = [0, 10] pic50_prior = [-1, 20] sigma_prior = [1e-3, 50] prior_lowers = np.array([hill_prior[0], pic50_prior[0], sigma_prior[0]]) prior_uppers = np.array([hill_prior[1], pic50_prior[1], sigma_prior[1]]) # for reproducible results, otherwise select a new random seed seed = 1 npr.seed(seed) start = time.time() x0 = np.array( [1., 2.5] ) # not fitting sigma by CMA-ES, can maximise log-likelihood wrt sigma analytically sigma0 = 0.1 opts = cma.CMAOptions() opts['seed'] = seed es = cma.CMAEvolutionStrategy(x0, sigma0, opts) while not es.stop(): X = es.ask() es.tell(X, [sum_of_square_diffs(x, concs, responses) for x in X]) es.disp() res = es.result() hill_cur = res[0][0]**2 pic50_cur = res[0][1]**2 - 1 sigma_cur = initial_sigma(len(responses), res[1]) proposal_scale = 0.01 theta_cur = np.array([hill_cur, pic50_cur, sigma_cur]) mean_estimate = np.copy(theta_cur) cov_estimate = proposal_scale * np.diag(np.copy(np.abs(theta_cur))) cmaes_ll = log_likelihood_single(responses, concs, theta_cur) best_fit_fig = plt.figure(figsize=(5, 4)) best_fit_ax = best_fit_fig.add_subplot(111) best_fit_ax.set_xscale('log') best_fit_ax.grid() plot_lower_lim = int(np.log10(np.min(concs))) - 1 plot_upper_lim = int(np.log10(np.max(concs))) + 2 best_fit_ax.set_xlim(10**plot_lower_lim, 10**plot_upper_lim) best_fit_ax.set_ylim(0, 100) num_pts = 1001 x_range = np.logspace(plot_lower_lim, plot_upper_lim, num_pts) best_fit_curve = dr.dose_response_model(x_range, hill_cur, dr.pic50_to_ic50(pic50_cur)) best_fit_ax.plot(x_range, best_fit_curve, label='Best fit', lw=2) best_fit_ax.set_ylabel('% {} block'.format(channel)) best_fit_ax.set_xlabel(r'{} concentration ($\mu$M)'.format(drug)) best_fit_ax.set_title('Hill = {}, pIC50 = {}'.format( np.round(hill_cur, 2), np.round(pic50_cur, 2))) best_fit_ax.scatter(concs, responses, marker="o", color='orange', s=100, label='Data', zorder=10) best_fit_ax.legend(loc=2) best_fit_fig.tight_layout() best_fit_fig.savefig(images_dir + '{}_{}_CMA-ES_best_fit.png'.format(drug, channel)) best_fit_fig.savefig(images_dir + '{}_{}_CMA-ES_best_fit.png'.format(drug, channel)) plt.close() #sys.exit() # uncomment if you only want to plot the best fit # let MCMC look around for a bit before adaptive covariance matrix # same rule (100*dimension) as in hierarchical case when_to_adapt = 100 * num_params log_target_cur = log_likelihood_single(responses, concs, theta_cur) print "initial log_target_cur =", log_target_cur # effectively step size, scales covariance matrix loga = 0. # what fraction of proposed samples are being accepted into the chain acceptance = 0. # what fraction of samples we WANT accepted into the chain # loga updates itself to try to make this dream come true target_acceptance = 0.25 total_iterations = args.iterations thinning = args.thinning assert (total_iterations % thinning == 0) # how often to print a little status message status_when = total_iterations / 20 saved_iterations = total_iterations / thinning + 1 # also want to store log-target value at each iteration chain = np.zeros((saved_iterations, num_params + 1)) chain[0, :] = np.concatenate((np.copy(theta_cur), [log_target_cur])) print chain[0] print "concs:", concs print "responses:", responses # MCMC! t = 1 start = time.time() while t <= total_iterations: theta_star = npr.multivariate_normal(theta_cur, np.exp(loga) * cov_estimate) accepted = 0 if np.all(prior_lowers < theta_star) and np.all( theta_star < prior_uppers): log_target_star = log_likelihood_single(responses, concs, theta_star) accept_prob = npr.rand() if (np.log(accept_prob) < log_target_star - log_target_cur): theta_cur = theta_star log_target_cur = log_target_star accepted = 1 acceptance = ((t - 1.) * acceptance + accepted) / t if (t > when_to_adapt): s = t - when_to_adapt gamma_s = 1 / (s + 1)**0.6 temp_covariance_bit = np.array([theta_cur - mean_estimate]) cov_estimate = (1 - gamma_s) * cov_estimate + gamma_s * np.dot( np.transpose(temp_covariance_bit), temp_covariance_bit) mean_estimate = (1 - gamma_s) * mean_estimate + gamma_s * theta_cur loga += gamma_s * (accepted - target_acceptance) if (t % thinning == 0): chain[t / thinning, :] = np.concatenate( (np.copy(theta_cur), [log_target_cur])) if (t % status_when == 0): print "{} / {}".format(t / status_when, total_iterations / status_when) time_taken_so_far = time.time() - start estimated_time_left = time_taken_so_far / t * (total_iterations - t) print "Time taken: {} s = {} min".format( np.round(time_taken_so_far, 1), np.round(time_taken_so_far / 60, 2)) print "acceptance = {}".format(np.round(acceptance, 5)) print "Estimated time remaining: {} s = {} min".format( np.round(estimated_time_left, 1), np.round(estimated_time_left / 60, 2)) t += 1 print "\nTime taken to do {} MCMC iterations: {} s\n".format( total_iterations, time.time() - start) print "Final iteration:", chain[-1, :], "\n" with open(chain_file, 'w') as outfile: outfile.write( '# Nonhierarchical MCMC output for {} + {}: (Hill,pIC50,sigma,log-target)\n' .format(drug, channel)) np.savetxt(outfile, chain) try: assert (len(chain[:, 0]) == saved_iterations) except AssertionError: print "len(chain[:,0])!=saved_iterations" sys.exit() burn_fraction = args.burn_in_fraction burn = saved_iterations / burn_fraction best_ll_index = np.argmax(chain[:, num_params]) best_ll_row = chain[best_ll_index, :] print "Best log-likelihood:", "\n", best_ll_row figs = [] axs = [] # plot all marginal posterior distributions for i in range(num_params): labels = ['Hill', 'pIC50', r'$\sigma$'] file_labels = ['Hill', 'pIC50', 'sigma'] figs.append(plt.figure()) axs.append([]) axs[i].append(figs[i].add_subplot(211)) axs[i][0].hist(chain[burn:, i], bins=40, normed=True) axs[i][0].legend() axs[i][0].set_title("MCMC marginal distributions") axs[i].append(figs[i].add_subplot(212, sharex=axs[i][0])) axs[i][1].plot(chain[burn:, i], range(burn, saved_iterations)) axs[i][1].invert_yaxis() axs[i][1].set_xlabel(labels[i]) axs[i][1].set_ylabel('Saved MCMC iteration') figs[i].tight_layout() figs[i].savefig( images_dir + '{}_{}_{}_marginal.png'.format(drug, channel, file_labels[i])) plt.close() # plot log-target path fig2 = plt.figure() ax3 = fig2.add_subplot(111) ax3.plot(range(saved_iterations), chain[:, -1]) ax3.set_xlabel('MCMC iteration') ax3.set_ylabel('log-target') fig2.tight_layout() fig2.savefig(images_dir + 'log_target.png') plt.close() # plot scatterplot matrix of posterior(s) labels = ['Hill', 'pIC50', r'$\sigma$'] colormin, colormax = 1e9, 0 norm = matplotlib.colors.Normalize(vmin=5, vmax=10) hidden_labels = [] count = 0 # there's probably a better way to do this # I plot all the histograms to normalize the colours, in an attempt to give a better comparison between the pairwise plots while count < 2: axes = {} matrix_fig = plt.figure(figsize=(3 * num_params, 3 * num_params)) for i in range(num_params): for j in range(i + 1): ij = str(i) + str(j) subplot_position = num_params * i + j + 1 if i == j: axes[ij] = matrix_fig.add_subplot(num_params, num_params, subplot_position) axes[ij].hist(chain[burn:, i], bins=50, normed=True, color='blue') elif j == 0: # this column shares x-axis with top-left axes[ij] = matrix_fig.add_subplot(num_params, num_params, subplot_position, sharex=axes["00"]) counts, xedges, yedges, Image = axes[ij].hist2d( chain[burn:, j], chain[burn:, i], cmap='hot_r', bins=50, norm=norm) maxcounts = np.amax(counts) if maxcounts > colormax: colormax = maxcounts mincounts = np.amin(counts) if mincounts < colormin: colormin = mincounts else: axes[ij] = matrix_fig.add_subplot( num_params, num_params, subplot_position, sharex=axes[str(j) + str(j)], sharey=axes[str(i) + "0"]) counts, xedges, yedges, Image = axes[ij].hist2d( chain[burn:, j], chain[burn:, i], cmap='hot_r', bins=50, norm=norm) maxcounts = np.amax(counts) if maxcounts > colormax: colormax = maxcounts mincounts = np.amin(counts) if mincounts < colormin: colormin = mincounts if i != num_params - 1: hidden_labels.append(axes[ij].get_xticklabels()) if j != 0: hidden_labels.append(axes[ij].get_yticklabels()) if i == num_params - 1: axes[str(i) + str(j)].set_xlabel(labels[j]) if j == 0: axes[str(i) + str(j)].set_ylabel(labels[i]) plt.xticks(rotation=30) norm = matplotlib.colors.Normalize(vmin=colormin, vmax=colormax) count += 1 plt.setp(hidden_labels, visible=False) matrix_fig.tight_layout() matrix_fig.savefig(images_dir + "{}_{}_scatterplot_matrix.png".format(drug, channel)) #matrix_fig.savefig(images_dir+"{}_{}_scatterplot_matrix.pdf".format(drug,channel)) plt.close() print "\n\n{} + {} complete!\n\n".format(drug, channel)