def get_eval(lr=0.01, n_episodes=50, is_train=False, savefig=False): # mkdir print('qlearning_nn evaluating...') base_dir = './results/qlearning_nn' if not os.path.exists(base_dir): os.makedirs(base_dir) log_file = os.path.join(base_dir, 'qlearning_nn.log') logger = logging(log_file) results_file = os.path.join(base_dir, 'qlearning_nn.csv') if os.path.exists(results_file) and not is_train and not savefig: results = pd.read_csv(results_file) results = results.sort_values(by=['noisy', 'problem_id']) return results else: if os.path.exists(results_file): os.remove(results_file) if os.path.exists(log_file): os.remove(log_file) pkl_file = os.path.join( base_dir, 'qlearning_nn_lr={}_episodes={}.pkl'.format(lr, n_episodes)) if os.path.exists(pkl_file): q_learning_nn = pickle.load(open(pkl_file, 'rb')) else: q_learning_nn = train(lr=lr, n_episodes=n_episodes) # eval results = pd.DataFrame([], columns=[ 'problem_id', 'noisy', 'action', 'Total_rewards', 'avg_reward_per_action' ]) for problem_id, noisy, env in get_env(): states, rewards, actions = implement(env, q_learning_nn, 1, discount_factor=0.95) result = { 'problem_id': problem_id, 'noisy': noisy, 'Total_rewards': sum(rewards), 'avg_reward_per_action': sum(rewards) / len(actions) } results = results.append(pd.DataFrame(result, index=[0]), ignore_index=0) logger(' ' + str(result)) logger(actions) if savefig: get_fig(states, rewards) pic_name = os.path.join( base_dir, 'problem_id={} noisy={}.jpg'.format(problem_id, noisy)) plt.savefig(dpi=300, fname=pic_name) plt.close() env.close() results = results.sort_values(by=['noisy', 'problem_id']) results.to_csv(results_file, index=0) return results
def plot_batch(df, batch): # Plot 50uM. df_50uM = df[df.conc == -3] if batch.startswith('Ala'): df_dmso = df_50uM[df_50uM.comp == 'DMSO'] for comp in [ 'K252a', 'SU11652', 'TG101209', 'RIF', 'IKK16' ]: df_comp = df_50uM[df_50uM.comp == comp] t, p_2side = ss.ttest_ind(df_comp.fluo, df_dmso.fluo) p_1side = p_2side / 2. if t < 0 else 1. - (p_2side / 2.) print('{}, one-sided t-test P = {}, n = {}' .format(comp, p_1side, len(df_comp))) if batch == 'AlaA': order = [ 'K252a', 'SU11652', 'TG101209', 'RIF', 'DMSO' ] elif batch == 'AlaB': order = [ 'IKK16', 'K252a', 'RIF', 'DMSO' ] else: return plt.figure() sns.barplot(x='comp', y='fluo', data=df_50uM, ci=95, dodge=False, hue='control', palette=sns.color_palette("RdBu_r", 7), order=order, capsize=0.2, errcolor='#888888',) sns.swarmplot(x='comp', y='fluo', data=df_50uM, color='black', order=order) #plt.ylim([ 10, 300000 ]) if not batch.startswith('Ala'): plt.yscale('log') plt.savefig('figures/tb_culture_50uM_{}.svg'.format(batch)) plt.close() # Plot dose-response. comps = sorted(set(df.comp)) concentrations = sorted(set(df.conc)) plt.figure(figsize=(24, 6)) for cidx, comp in enumerate(order): df_subset = df[df.comp == comp] plt.subplot(1, 5, cidx + 1) sns.lineplot(x='conc', y='fluo', data=df_subset, ci=95,) sns.scatterplot(x='conc', y='fluo', data=df_subset, color='black',) plt.title(comp) if batch.startswith('Ala'): plt.ylim([ 0., 1.3 ]) else: plt.ylim([ 10, 1000000 ]) plt.yscale('log') plt.xticks(list(range(-3, -6, -1)), [ '50', '25', '10', ])#'1', '0.1' ]) plt.savefig('figures/tb_culture_{}.svg'.format(batch)) plt.close()
def visualize_heatmap(chem_prot, suffix=''): plt.figure() cmap = sns.diverging_palette(220, 10, as_cmap=True) sns.heatmap(chem_prot, cmap=cmap) mkdir_p('figures/') if suffix == '': plt.savefig('figures/heatmap.png', dpi=300) else: plt.savefig('figures/heatmap_{}.png'.format(suffix), dpi=300) plt.close()
def acquisition_scatter(y_unk_pred, var_unk_pred, acquisition, regress_type): y_unk_pred = y_unk_pred[:] y_unk_pred[y_unk_pred > 10000] = 10000 plt.figure() plt.scatter(y_unk_pred, var_unk_pred, alpha=0.5, c=-acquisition, cmap='hot') plt.title(regress_type.title()) plt.xlabel('Predicted score') plt.ylabel('Variance') plt.savefig('figures/acquisition_unknown_{}.png' .format(regress_type), dpi=200) plt.close()
def score_scatter(y_pred, y, var_pred, regress_type, prefix=''): y_pred = y_pred[:] y_pred[y_pred < 0] = 0 y_pred[y_pred > 10000] = 10000 plt.figure() plt.scatter(y_pred, var_pred, alpha=0.3, c=(y - y.min()) / (y.max() - y.min())) plt.viridis() plt.xlabel('Predicted score') plt.ylabel('Variance') plt.savefig('figures/variance_vs_pred_{}regressors{}.png' .format(prefix, regress_type), dpi=300) plt.close()
def plot_values(df, score_fn): models = ['mlper1', 'sparsehybrid', 'gp', 'real'] plt.figure(figsize=(10, 4)) for midx, model in enumerate(models): if model == 'gp': color = '#3e5c71' elif model == 'sparsehybrid': color = '#2d574e' elif model == 'mlper1': color = '#a12424' elif model == 'real': color = '#A9A9A9' else: raise ValueError('Invalid model'.format(model)) plt.subplot(1, len(models), midx + 1) df_subset = df[df.model == model] compounds = np.array(df_subset.compound_) if model == 'real': order = sorted(compounds) else: order = compounds[np.argsort(-df_subset.affinity)] sns.barplot(data=df_subset, x='compound_', y='affinity', color=color, order=order) if score_fn == 'rdock': plt.ylim([0, -40]) else: plt.ylim([0, -12]) plt.xticks(rotation=45) plt.savefig('figures/design_docking_{}.svg'.format(score_fn)) plt.close() print('Score function: {}'.format(score_fn)) print('GP vs MLP: {}'.format( ttest_ind( df[df.model == 'gp'].affinity, df[df.model == 'mlper1'].affinity, ))) print('Hybrid vs MLP: {}'.format( ttest_ind( df[df.model == 'sparsehybrid'].affinity, df[df.model == 'mlper1'].affinity, ))) print('')
continue seen.add(zinc) order_list.append((order, Kd)) order_list = [ order for order, _ in sorted(order_list, key=lambda x: x[1]) ] plt.subplot(1, 3, bidx + 1) sns.barplot( x='order', y='Kdpoint', data=df_subset, color=palette[bidx], order=order_list, ci=95, capsize=0.4, errcolor='#888888', ) sns.swarmplot( x='order', y='Kdpoint', data=df_subset, color='black', order=order_list, ) plt.ylim([-100, 10100]) plt.savefig('figures/prediction_barplot_{}.svg'.format(model)) plt.close()
def latent_scatter(var_unk_pred, y_unk_pred, acquisition, **kwargs): chems = kwargs['chems'] chem2feature = kwargs['chem2feature'] idx_obs = kwargs['idx_obs'] idx_unk = kwargs['idx_unk'] regress_type = kwargs['regress_type'] prot_target = kwargs['prot_target'] chem_idx_obs = sorted(set([i for i, _ in idx_obs])) chem_idx_unk = sorted(set([i for i, _ in idx_unk])) feature_obs = np.array([chem2feature[chems[i]] for i in chem_idx_obs]) feature_unk = np.array([chem2feature[chems[i]] for i in chem_idx_unk]) from sklearn.neighbors import NearestNeighbors nbrs = NearestNeighbors(n_neighbors=1).fit(feature_obs) dist = np.ravel(nbrs.kneighbors(feature_unk)[0]) print('Distance Spearman r = {}, P = {}'.format( *ss.spearmanr(dist, var_unk_pred))) print('Distance Pearson rho = {}, P = {}'.format( *ss.pearsonr(dist, var_unk_pred))) X = np.vstack([feature_obs, feature_unk]) labels = np.concatenate( [np.zeros(len(chem_idx_obs)), np.ones(len(chem_idx_unk))]) sidx = np.argsort(-var_unk_pred) from fbpca import pca U, s, Vt = pca( X, k=3, ) X_pca = U * s from umap import UMAP um = UMAP( n_neighbors=15, min_dist=0.5, n_components=2, metric='euclidean', ) X_umap = um.fit_transform(X) from MulticoreTSNE import MulticoreTSNE as TSNE tsne = TSNE( n_components=2, n_jobs=20, ) X_tsne = tsne.fit_transform(X) if prot_target is None: suffix = '' else: suffix = '_' + prot_target for name, coords in zip( ['pca', 'umap', 'tsne'], [X_pca, X_umap, X_tsne], ): plt.figure() sns.scatterplot( x=coords[labels == 1, 0], y=coords[labels == 1, 1], color='blue', alpha=0.1, ) plt.scatter( x=coords[labels == 0, 0], y=coords[labels == 0, 1], color='orange', alpha=1.0, marker='x', linewidths=10, ) plt.savefig('figures/latent_scatter_{}_ypred_{}{}.png'.format( name, regress_type, suffix), dpi=300) plt.close() plt.figure() plt.scatter(x=coords[labels == 1, 0], y=coords[labels == 1, 1], c=ss.rankdata(var_unk_pred), alpha=0.1, cmap='coolwarm') plt.savefig('figures/latent_scatter_{}_var_{}{}.png'.format( name, regress_type, suffix), dpi=300) plt.close() plt.figure() plt.scatter(x=coords[labels == 1, 0], y=coords[labels == 1, 1], c=-acquisition, alpha=0.1, cmap='hot') plt.savefig('figures/latent_scatter_{}_acq_{}{}.png'.format( name, regress_type, suffix), dpi=300) plt.close()
def parse_log(regress_type, experiment, **kwargs): log_fname = ('iterate_davis2011kinase_{}_{}.log'.format( regress_type, experiment)) iteration = 0 iter_to_Kds = {} iter_to_idxs = {} with open(log_fname) as f: while True: line = f.readline() if not line: break if not line.startswith('2019') and not line.startswith('2020'): continue if not ' | ' in line: continue line = line.split(' | ')[1] if line.startswith('Iteration'): iteration = int(line.strip().split()[-1]) if not iteration in iter_to_Kds: iter_to_Kds[iteration] = [] if not iteration in iter_to_idxs: iter_to_idxs[iteration] = [] continue elif line.startswith('\tAcquire '): fields = line.strip().split() Kd = float(fields[-1]) iter_to_Kds[iteration].append(Kd) chem_idx = int(fields[1].lstrip('(').rstrip(',')) prot_idx = int(fields[2].strip().rstrip(')')) iter_to_idxs[iteration].append((chem_idx, prot_idx)) continue assert (iter_to_Kds.keys() == iter_to_idxs.keys()) iterations = sorted(iter_to_Kds.keys()) # Plot Kd over iterations. Kd_iter, Kd_iter_max, Kd_iter_min = [], [], [] all_Kds = [] for iteration in iterations: Kd_iter.append(np.mean(iter_to_Kds[iteration])) Kd_iter_max.append(max(iter_to_Kds[iteration])) Kd_iter_min.append(min(iter_to_Kds[iteration])) all_Kds += list(iter_to_Kds[iteration]) if iteration == 0: print('First average Kd is {}'.format(Kd_iter[0])) elif iteration > 4 and experiment == 'perprot': break print('Average Kd is {}'.format(np.mean(all_Kds))) plt.figure() plt.scatter(iterations, Kd_iter) plt.plot(iterations, Kd_iter) plt.fill_between(iterations, Kd_iter_min, Kd_iter_max, alpha=0.3) plt.viridis() plt.title(' '.join([regress_type, experiment])) plt.savefig('figures/Kd_over_iterations_{}_{}.png'.format( regress_type, experiment)) plt.close() return # Plot differential entropy of acquired samples over iterations. chems = kwargs['chems'] prots = kwargs['prots'] chem2feature = kwargs['chem2feature'] prot2feature = kwargs['prot2feature'] d_entropies = [] X_acquired = [] for iteration in iterations: for i, j in iter_to_idxs[iteration]: chem = chems[i] prot = prots[j] X_acquired.append(chem2feature[chem] + prot2feature[prot]) if len(X_acquired) <= 1: d_entropies.append(float('nan')) else: gaussian = GaussianMixture().fit(np.array(X_acquired)) gaussian = multivariate_normal(gaussian.means_[0], gaussian.covariances_[0]) d_entropies.append(gaussian.entropy()) print('Final differential entropy is {}'.format(d_entropies[-1])) plt.figure() plt.scatter(iterations, d_entropies) plt.plot(iterations, d_entropies) plt.viridis() plt.title(' '.join([regress_type, experiment])) plt.savefig('figures/entropy_over_iterations_{}_{}.png'.format( regress_type, experiment)) plt.close()