def plot_clusters(coords, clusters, s=1): if coords.shape[0] != clusters.shape[0]: sys.stderr.write( 'Mismatch: {} cells, {} labels\n' .format(coords.shape[0], clusters.shape[0]) ) assert(coords.shape[0] == clusters.shape[0]) colors = np.array( list(islice(cycle([ '#377eb8', '#ff7f00', '#4daf4a', '#f781bf', '#a65628', '#984ea3', '#999999', '#e41a1c', '#dede00', '#ffe119', '#e6194b', '#ffbea3', '#911eb4', '#46f0f0', '#f032e6', '#d2f53c', '#008080', '#e6beff', '#aa6e28', '#800000', '#aaffc3', '#808000', '#ffd8b1', '#000080', '#808080', '#fabebe', '#a3f4ff' ]), int(max(clusters) + 1))) ) plt.figure() plt.scatter(coords[:, 0], coords[:, 1], c=colors[clusters], s=s)
def plot_batch(df, batch): # Plot 50uM. df_50uM = df[df.conc == -3] if batch.startswith('Ala'): df_dmso = df_50uM[df_50uM.comp == 'DMSO'] for comp in [ 'K252a', 'SU11652', 'TG101209', 'RIF', 'IKK16' ]: df_comp = df_50uM[df_50uM.comp == comp] t, p_2side = ss.ttest_ind(df_comp.fluo, df_dmso.fluo) p_1side = p_2side / 2. if t < 0 else 1. - (p_2side / 2.) print('{}, one-sided t-test P = {}, n = {}' .format(comp, p_1side, len(df_comp))) if batch == 'AlaA': order = [ 'K252a', 'SU11652', 'TG101209', 'RIF', 'DMSO' ] elif batch == 'AlaB': order = [ 'IKK16', 'K252a', 'RIF', 'DMSO' ] else: return plt.figure() sns.barplot(x='comp', y='fluo', data=df_50uM, ci=95, dodge=False, hue='control', palette=sns.color_palette("RdBu_r", 7), order=order, capsize=0.2, errcolor='#888888',) sns.swarmplot(x='comp', y='fluo', data=df_50uM, color='black', order=order) #plt.ylim([ 10, 300000 ]) if not batch.startswith('Ala'): plt.yscale('log') plt.savefig('figures/tb_culture_50uM_{}.svg'.format(batch)) plt.close() # Plot dose-response. comps = sorted(set(df.comp)) concentrations = sorted(set(df.conc)) plt.figure(figsize=(24, 6)) for cidx, comp in enumerate(order): df_subset = df[df.comp == comp] plt.subplot(1, 5, cidx + 1) sns.lineplot(x='conc', y='fluo', data=df_subset, ci=95,) sns.scatterplot(x='conc', y='fluo', data=df_subset, color='black',) plt.title(comp) if batch.startswith('Ala'): plt.ylim([ 0., 1.3 ]) else: plt.ylim([ 10, 1000000 ]) plt.yscale('log') plt.xticks(list(range(-3, -6, -1)), [ '50', '25', '10', ])#'1', '0.1' ]) plt.savefig('figures/tb_culture_{}.svg'.format(batch)) plt.close()
def visualize_heatmap(chem_prot, suffix=''): plt.figure() cmap = sns.diverging_palette(220, 10, as_cmap=True) sns.heatmap(chem_prot, cmap=cmap) mkdir_p('figures/') if suffix == '': plt.savefig('figures/heatmap.png', dpi=300) else: plt.savefig('figures/heatmap_{}.png'.format(suffix), dpi=300) plt.close()
def acquisition_scatter(y_unk_pred, var_unk_pred, acquisition, regress_type): y_unk_pred = y_unk_pred[:] y_unk_pred[y_unk_pred > 10000] = 10000 plt.figure() plt.scatter(y_unk_pred, var_unk_pred, alpha=0.5, c=-acquisition, cmap='hot') plt.title(regress_type.title()) plt.xlabel('Predicted score') plt.ylabel('Variance') plt.savefig('figures/acquisition_unknown_{}.png' .format(regress_type), dpi=200) plt.close()
def score_scatter(y_pred, y, var_pred, regress_type, prefix=''): y_pred = y_pred[:] y_pred[y_pred < 0] = 0 y_pred[y_pred > 10000] = 10000 plt.figure() plt.scatter(y_pred, var_pred, alpha=0.3, c=(y - y.min()) / (y.max() - y.min())) plt.viridis() plt.xlabel('Predicted score') plt.ylabel('Variance') plt.savefig('figures/variance_vs_pred_{}regressors{}.png' .format(prefix, regress_type), dpi=300) plt.close()
def plot_values(df, score_fn): models = ['mlper1', 'sparsehybrid', 'gp', 'real'] plt.figure(figsize=(10, 4)) for midx, model in enumerate(models): if model == 'gp': color = '#3e5c71' elif model == 'sparsehybrid': color = '#2d574e' elif model == 'mlper1': color = '#a12424' elif model == 'real': color = '#A9A9A9' else: raise ValueError('Invalid model'.format(model)) plt.subplot(1, len(models), midx + 1) df_subset = df[df.model == model] compounds = np.array(df_subset.compound_) if model == 'real': order = sorted(compounds) else: order = compounds[np.argsort(-df_subset.affinity)] sns.barplot(data=df_subset, x='compound_', y='affinity', color=color, order=order) if score_fn == 'rdock': plt.ylim([0, -40]) else: plt.ylim([0, -12]) plt.xticks(rotation=45) plt.savefig('figures/design_docking_{}.svg'.format(score_fn)) plt.close() print('Score function: {}'.format(score_fn)) print('GP vs MLP: {}'.format( ttest_ind( df[df.model == 'gp'].affinity, df[df.model == 'mlper1'].affinity, ))) print('Hybrid vs MLP: {}'.format( ttest_ind( df[df.model == 'sparsehybrid'].affinity, df[df.model == 'mlper1'].affinity, ))) print('')
def plot_mapping(curr_ds, curr_ref, ds_ind, ref_ind): tsne = TSNE(n_iter=400, verbose=VERBOSE, random_state=69) tsne.fit(curr_ds) plt.figure() coords_ds = tsne.embedding_[:, :] coords_ds[:, 1] += 100 plt.scatter(coords_ds[:, 0], coords_ds[:, 1]) tsne.fit(curr_ref) coords_ref = tsne.embedding_[:, :] plt.scatter(coords_ref[:, 0], coords_ref[:, 1]) x_list, y_list = [], [] for r_i, c_i in zip(ds_ind, ref_ind): x_list.append(coords_ds[r_i, 0]) x_list.append(coords_ref[c_i, 0]) x_list.append(None) y_list.append(coords_ds[r_i, 1]) y_list.append(coords_ref[c_i, 1]) y_list.append(None) plt.plot(x_list, y_list, 'b-', alpha=0.3) plt.show()
continue if t >= 1600000 + a: break d[line[:1]]['x'].append(t-a) d[line[:1]]['y'].append(k) s['x'].append(t-a) s['y'].append(n) for i in ['J']: d[i] = sample(d[i],1000) s = sample(s) print "loaded data" fig = plt.figure() from matplotlib.ticker import EngFormatter formatter = EngFormatter(places=1) textsize = 11 mode = "all" if mode == 'all': fig.set_size_inches(20,4) p = 211 i = 'J' ax1 = fig.add_subplot(p) ax1.set_ylabel('#Keys Moved') formatter = EngFormatter(places=1)
'chem', 'prot', 'pred_Kd', 'Kd', 'Kdpoint', ]) models = sorted(set(df.model)) betas = sorted(set(df.beta)) for model in models: if model == 'Sparse Hybrid': palette = sns.color_palette('ch:2.5,-.2,dark=.3', len(betas)) else: palette = list(reversed(sns.color_palette('Blues_d', len(betas)))) plt.figure() for bidx, beta in enumerate(betas): df_subset = df[(df.model == model) & (df.beta == beta)] seen, order_list = set(), [] for zinc, order, Kd in zip(df_subset.zincid, df_subset.order, df_subset.Kd): if zinc in seen: continue seen.add(zinc) order_list.append((order, Kd)) order_list = [ order for order, _ in sorted(order_list, key=lambda x: x[1]) ]
#% Load model model = load_model(filepath + 'unet_exp_' + str(exp) + '.h5', compile=False) area = 11 # Prediction ref_final, pre_final, prob_recontructed, ref_reconstructed, mask_no_considered_, mask_ts, time_ts = prediction( model, image_array, image_ref, final_mask, mask_ts_, patch_size, area) # Metrics cm = confusion_matrix(ref_final, pre_final) metrics = compute_metrics(ref_final, pre_final) print('Confusion matrix \n', cm) print('Accuracy: ', metrics[0]) print('F1score: ', metrics[1]) print('Recall: ', metrics[2]) print('Precision: ', metrics[3]) # Alarm area total = (cm[1, 1] + cm[0, 1]) / len(ref_final) * 100 print('Area to be analyzed', total) print('training time', end_training) print('test time', time_ts) #%% Show the results # prediction of the whole image fig1 = plt.figure('whole prediction') plt.imshow(prob_recontructed) # Show the test tiles fig2 = plt.figure('prediction of test set') plt.imshow(prob_recontructed * mask_ts)
for line in fileinput.input(f+"/control.queries"): if "loadFreqs" in line: loads.append(line) if len(loads) < 2: print "coudln't find loads for " + f continue loads_d = [] for i in loads: loads_d.append(get_num_dict(i)) chord_loads.append(get_50_percent(loads_d[0])) vserver_loads.append(get_50_percent(loads_d[1])) x_values.append(next(get_numbers(f))) plt.figure().set_size_inches(6.5,5) plt.xlabel("#Nodes") plt.ylabel("% of nodes storing 50% of data") from matplotlib.ticker import EngFormatter formatter = EngFormatter(places=0) plt.gca().xaxis.set_major_formatter(formatter) plt.ylim(0,0.5) plt.xlim(0,1000000) out_file = "intro_lb_chord.pdf" d1 = prepare(x_values,chord_loads) d2 = prepare(x_values,vserver_loads)
# ref_final, pre_final, prob_recontructed, ref_reconstructed, mask_no_considered_, mask_ts, time_ts = prediction(model, image_array, image_ref, final_mask, mask_ts_, patch_size, area) # Metrics true_labels = np.reshape(patches_test_ref, (patches_test_ref.shape[0]* patches_test_ref.shape[1]*patches_test_ref.shape[2])) predicted_labels = np.reshape(patches_pred, (patches_pred.shape[0]* patches_pred.shape[1]*patches_pred.shape[2])) cm = confusion_matrix(true_labels, predicted_labels) metrics = compute_metrics(true_labels, predicted_labels) print('Confusion matrix \n', cm) print('Accuracy: ', metrics[0]) print('F1score: ', metrics[1]) print('Recall: ', metrics[2]) print('Precision: ', metrics[3]) # Alarm area total = (cm[1,1]+cm[0,1])/len(true_label)*100 print('Area to be analyzed',total) print('training time', end_training) print('test time', time_ts) #%% Show the results # prediction of the whole image fig1 = plt.figure('whole prediction') plt.imshow(prob_recontructed) # Show the test tiles # fig2 = plt.figure('prediction of test set') # plt.imshow(prob_recontructed*mask_ts)
y_true = masks.reshape( masks.shape[0] * masks.shape[1] * masks.shape[2] * masks.shape[3], 1) y_scores = np.where(y_scores > 0.5, 1, 0) y_true = np.where(y_true > 0.5, 1, 0) import os os.mkdir('./output') output_folder = 'output/' #Area under the ROC curve fpr, tpr, thresholds = roc_curve((y_true), y_scores) AUC_ROC = roc_auc_score(y_true, y_scores) print("\nArea under the ROC curve: " + str(AUC_ROC)) roc_curve = plt.figure() plt.plot(fpr, tpr, '-', label='Area Under the Curve (AUC = %0.4f)' % AUC_ROC) plt.title('ROC curve') plt.xlabel("FPR (False Positive Rate)") plt.ylabel("TPR (True Positive Rate)") plt.legend(loc="lower right") plt.savefig(output_folder + "ROC.png") #Precision-recall curve precision, recall, thresholds = precision_recall_curve(y_true, y_scores) precision = np.fliplr([precision])[0] recall = np.fliplr([recall])[0] AUC_prec_rec = np.trapz(precision, recall) print("\nArea under Precision-Recall curve: " + str(AUC_prec_rec)) prec_rec_curve = plt.figure() plt.plot(recall,
def latent_scatter(var_unk_pred, y_unk_pred, acquisition, **kwargs): chems = kwargs['chems'] chem2feature = kwargs['chem2feature'] idx_obs = kwargs['idx_obs'] idx_unk = kwargs['idx_unk'] regress_type = kwargs['regress_type'] prot_target = kwargs['prot_target'] chem_idx_obs = sorted(set([i for i, _ in idx_obs])) chem_idx_unk = sorted(set([i for i, _ in idx_unk])) feature_obs = np.array([chem2feature[chems[i]] for i in chem_idx_obs]) feature_unk = np.array([chem2feature[chems[i]] for i in chem_idx_unk]) from sklearn.neighbors import NearestNeighbors nbrs = NearestNeighbors(n_neighbors=1).fit(feature_obs) dist = np.ravel(nbrs.kneighbors(feature_unk)[0]) print('Distance Spearman r = {}, P = {}'.format( *ss.spearmanr(dist, var_unk_pred))) print('Distance Pearson rho = {}, P = {}'.format( *ss.pearsonr(dist, var_unk_pred))) X = np.vstack([feature_obs, feature_unk]) labels = np.concatenate( [np.zeros(len(chem_idx_obs)), np.ones(len(chem_idx_unk))]) sidx = np.argsort(-var_unk_pred) from fbpca import pca U, s, Vt = pca( X, k=3, ) X_pca = U * s from umap import UMAP um = UMAP( n_neighbors=15, min_dist=0.5, n_components=2, metric='euclidean', ) X_umap = um.fit_transform(X) from MulticoreTSNE import MulticoreTSNE as TSNE tsne = TSNE( n_components=2, n_jobs=20, ) X_tsne = tsne.fit_transform(X) if prot_target is None: suffix = '' else: suffix = '_' + prot_target for name, coords in zip( ['pca', 'umap', 'tsne'], [X_pca, X_umap, X_tsne], ): plt.figure() sns.scatterplot( x=coords[labels == 1, 0], y=coords[labels == 1, 1], color='blue', alpha=0.1, ) plt.scatter( x=coords[labels == 0, 0], y=coords[labels == 0, 1], color='orange', alpha=1.0, marker='x', linewidths=10, ) plt.savefig('figures/latent_scatter_{}_ypred_{}{}.png'.format( name, regress_type, suffix), dpi=300) plt.close() plt.figure() plt.scatter(x=coords[labels == 1, 0], y=coords[labels == 1, 1], c=ss.rankdata(var_unk_pred), alpha=0.1, cmap='coolwarm') plt.savefig('figures/latent_scatter_{}_var_{}{}.png'.format( name, regress_type, suffix), dpi=300) plt.close() plt.figure() plt.scatter(x=coords[labels == 1, 0], y=coords[labels == 1, 1], c=-acquisition, alpha=0.1, cmap='hot') plt.savefig('figures/latent_scatter_{}_acq_{}{}.png'.format( name, regress_type, suffix), dpi=300) plt.close()
def parse_log(regress_type, experiment, **kwargs): log_fname = ('iterate_davis2011kinase_{}_{}.log'.format( regress_type, experiment)) iteration = 0 iter_to_Kds = {} iter_to_idxs = {} with open(log_fname) as f: while True: line = f.readline() if not line: break if not line.startswith('2019') and not line.startswith('2020'): continue if not ' | ' in line: continue line = line.split(' | ')[1] if line.startswith('Iteration'): iteration = int(line.strip().split()[-1]) if not iteration in iter_to_Kds: iter_to_Kds[iteration] = [] if not iteration in iter_to_idxs: iter_to_idxs[iteration] = [] continue elif line.startswith('\tAcquire '): fields = line.strip().split() Kd = float(fields[-1]) iter_to_Kds[iteration].append(Kd) chem_idx = int(fields[1].lstrip('(').rstrip(',')) prot_idx = int(fields[2].strip().rstrip(')')) iter_to_idxs[iteration].append((chem_idx, prot_idx)) continue assert (iter_to_Kds.keys() == iter_to_idxs.keys()) iterations = sorted(iter_to_Kds.keys()) # Plot Kd over iterations. Kd_iter, Kd_iter_max, Kd_iter_min = [], [], [] all_Kds = [] for iteration in iterations: Kd_iter.append(np.mean(iter_to_Kds[iteration])) Kd_iter_max.append(max(iter_to_Kds[iteration])) Kd_iter_min.append(min(iter_to_Kds[iteration])) all_Kds += list(iter_to_Kds[iteration]) if iteration == 0: print('First average Kd is {}'.format(Kd_iter[0])) elif iteration > 4 and experiment == 'perprot': break print('Average Kd is {}'.format(np.mean(all_Kds))) plt.figure() plt.scatter(iterations, Kd_iter) plt.plot(iterations, Kd_iter) plt.fill_between(iterations, Kd_iter_min, Kd_iter_max, alpha=0.3) plt.viridis() plt.title(' '.join([regress_type, experiment])) plt.savefig('figures/Kd_over_iterations_{}_{}.png'.format( regress_type, experiment)) plt.close() return # Plot differential entropy of acquired samples over iterations. chems = kwargs['chems'] prots = kwargs['prots'] chem2feature = kwargs['chem2feature'] prot2feature = kwargs['prot2feature'] d_entropies = [] X_acquired = [] for iteration in iterations: for i, j in iter_to_idxs[iteration]: chem = chems[i] prot = prots[j] X_acquired.append(chem2feature[chem] + prot2feature[prot]) if len(X_acquired) <= 1: d_entropies.append(float('nan')) else: gaussian = GaussianMixture().fit(np.array(X_acquired)) gaussian = multivariate_normal(gaussian.means_[0], gaussian.covariances_[0]) d_entropies.append(gaussian.entropy()) print('Final differential entropy is {}'.format(d_entropies[-1])) plt.figure() plt.scatter(iterations, d_entropies) plt.plot(iterations, d_entropies) plt.viridis() plt.title(' '.join([regress_type, experiment])) plt.savefig('figures/entropy_over_iterations_{}_{}.png'.format( regress_type, experiment)) plt.close()