コード例 #1
0
ファイル: scanorama.py プロジェクト: trichelab/scanorama
def plot_clusters(coords, clusters, s=1):
    if coords.shape[0] != clusters.shape[0]:
        sys.stderr.write(
            'Mismatch: {} cells, {} labels\n'
            .format(coords.shape[0], clusters.shape[0])
        )
    assert(coords.shape[0] == clusters.shape[0])

    colors = np.array(
        list(islice(cycle([
            '#377eb8', '#ff7f00', '#4daf4a',
            '#f781bf', '#a65628', '#984ea3',
            '#999999', '#e41a1c', '#dede00',
            '#ffe119', '#e6194b', '#ffbea3',
            '#911eb4', '#46f0f0', '#f032e6',
            '#d2f53c', '#008080', '#e6beff',
            '#aa6e28', '#800000', '#aaffc3',
            '#808000', '#ffd8b1', '#000080',
            '#808080', '#fabebe', '#a3f4ff'
        ]), int(max(clusters) + 1)))
    )
        
    plt.figure()
    plt.scatter(coords[:, 0], coords[:, 1],
                c=colors[clusters], s=s)
コード例 #2
0
def plot_batch(df, batch):

    # Plot 50uM.

    df_50uM = df[df.conc == -3]

    if batch.startswith('Ala'):
        df_dmso = df_50uM[df_50uM.comp == 'DMSO']
        for comp in [ 'K252a', 'SU11652', 'TG101209', 'RIF', 'IKK16' ]:
            df_comp = df_50uM[df_50uM.comp == comp]
            t, p_2side = ss.ttest_ind(df_comp.fluo, df_dmso.fluo)
            p_1side = p_2side / 2. if t < 0 else 1. - (p_2side / 2.)
            print('{}, one-sided t-test P = {}, n = {}'
                  .format(comp, p_1side, len(df_comp)))

    if batch == 'AlaA':
        order = [ 'K252a', 'SU11652', 'TG101209', 'RIF', 'DMSO' ]
    elif batch == 'AlaB':
        order = [ 'IKK16', 'K252a', 'RIF', 'DMSO' ]
    else:
        return

    plt.figure()
    sns.barplot(x='comp', y='fluo', data=df_50uM, ci=95, dodge=False,
                hue='control', palette=sns.color_palette("RdBu_r", 7),
                order=order, capsize=0.2, errcolor='#888888',)
    sns.swarmplot(x='comp', y='fluo', data=df_50uM, color='black',
                  order=order)
    #plt.ylim([ 10, 300000 ])
    if not batch.startswith('Ala'):
        plt.yscale('log')
    plt.savefig('figures/tb_culture_50uM_{}.svg'.format(batch))
    plt.close()

    # Plot dose-response.

    comps = sorted(set(df.comp))
    concentrations = sorted(set(df.conc))

    plt.figure(figsize=(24, 6))
    for cidx, comp in enumerate(order):
        df_subset = df[df.comp == comp]

        plt.subplot(1, 5, cidx + 1)
        sns.lineplot(x='conc', y='fluo', data=df_subset, ci=95,)
        sns.scatterplot(x='conc', y='fluo', data=df_subset,
                        color='black',)
        plt.title(comp)
        if batch.startswith('Ala'):
            plt.ylim([ 0., 1.3 ])
        else:
            plt.ylim([ 10, 1000000 ])
            plt.yscale('log')
        plt.xticks(list(range(-3, -6, -1)),
                   [ '50', '25', '10', ])#'1', '0.1' ])

    plt.savefig('figures/tb_culture_{}.svg'.format(batch))
    plt.close()
コード例 #3
0
def visualize_heatmap(chem_prot, suffix=''):
    plt.figure()
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    sns.heatmap(chem_prot, cmap=cmap)
    mkdir_p('figures/')
    if suffix == '':
        plt.savefig('figures/heatmap.png', dpi=300)
    else:
        plt.savefig('figures/heatmap_{}.png'.format(suffix), dpi=300)
    plt.close()
コード例 #4
0
def acquisition_scatter(y_unk_pred, var_unk_pred, acquisition, regress_type):
    y_unk_pred = y_unk_pred[:]
    y_unk_pred[y_unk_pred > 10000] = 10000

    plt.figure()
    plt.scatter(y_unk_pred, var_unk_pred, alpha=0.5, c=-acquisition,
                cmap='hot')
    plt.title(regress_type.title())
    plt.xlabel('Predicted score')
    plt.ylabel('Variance')
    plt.savefig('figures/acquisition_unknown_{}.png'
                .format(regress_type), dpi=200)
    plt.close()
コード例 #5
0
def score_scatter(y_pred, y, var_pred, regress_type, prefix=''):
    y_pred = y_pred[:]
    y_pred[y_pred < 0] = 0
    y_pred[y_pred > 10000] = 10000

    plt.figure()
    plt.scatter(y_pred, var_pred, alpha=0.3,
                c=(y - y.min()) / (y.max() - y.min()))
    plt.viridis()
    plt.xlabel('Predicted score')
    plt.ylabel('Variance')
    plt.savefig('figures/variance_vs_pred_{}regressors{}.png'
                .format(prefix, regress_type), dpi=300)
    plt.close()
コード例 #6
0
def plot_values(df, score_fn):
    models = ['mlper1', 'sparsehybrid', 'gp', 'real']

    plt.figure(figsize=(10, 4))

    for midx, model in enumerate(models):
        if model == 'gp':
            color = '#3e5c71'
        elif model == 'sparsehybrid':
            color = '#2d574e'
        elif model == 'mlper1':
            color = '#a12424'
        elif model == 'real':
            color = '#A9A9A9'
        else:
            raise ValueError('Invalid model'.format(model))

        plt.subplot(1, len(models), midx + 1)
        df_subset = df[df.model == model]
        compounds = np.array(df_subset.compound_)
        if model == 'real':
            order = sorted(compounds)
        else:
            order = compounds[np.argsort(-df_subset.affinity)]
        sns.barplot(data=df_subset,
                    x='compound_',
                    y='affinity',
                    color=color,
                    order=order)
        if score_fn == 'rdock':
            plt.ylim([0, -40])
        else:
            plt.ylim([0, -12])
        plt.xticks(rotation=45)

    plt.savefig('figures/design_docking_{}.svg'.format(score_fn))
    plt.close()

    print('Score function: {}'.format(score_fn))
    print('GP vs MLP: {}'.format(
        ttest_ind(
            df[df.model == 'gp'].affinity,
            df[df.model == 'mlper1'].affinity,
        )))
    print('Hybrid vs MLP: {}'.format(
        ttest_ind(
            df[df.model == 'sparsehybrid'].affinity,
            df[df.model == 'mlper1'].affinity,
        )))
    print('')
コード例 #7
0
def plot_mapping(curr_ds, curr_ref, ds_ind, ref_ind):
    tsne = TSNE(n_iter=400, verbose=VERBOSE, random_state=69)

    tsne.fit(curr_ds)
    plt.figure()
    coords_ds = tsne.embedding_[:, :]
    coords_ds[:, 1] += 100
    plt.scatter(coords_ds[:, 0], coords_ds[:, 1])

    tsne.fit(curr_ref)
    coords_ref = tsne.embedding_[:, :]
    plt.scatter(coords_ref[:, 0], coords_ref[:, 1])

    x_list, y_list = [], []
    for r_i, c_i in zip(ds_ind, ref_ind):
        x_list.append(coords_ds[r_i, 0])
        x_list.append(coords_ref[c_i, 0])
        x_list.append(None)
        y_list.append(coords_ds[r_i, 1])
        y_list.append(coords_ref[c_i, 1])
        y_list.append(None)
    plt.plot(x_list, y_list, 'b-', alpha=0.3)
    plt.show()
コード例 #8
0
                continue
            if t >= 1600000 + a:
                break

            d[line[:1]]['x'].append(t-a)
            d[line[:1]]['y'].append(k)
            s['x'].append(t-a)
            s['y'].append(n)

    for i in ['J']:
        d[i] = sample(d[i],1000)

    s = sample(s)
    print "loaded data"

    fig = plt.figure()

    from matplotlib.ticker import EngFormatter
    formatter = EngFormatter(places=1)

    textsize = 11

    mode = "all"
    if mode == 'all':
        fig.set_size_inches(20,4)

        p = 211
        i = 'J'
        ax1 = fig.add_subplot(p)
        ax1.set_ylabel('#Keys Moved')
        formatter = EngFormatter(places=1)
コード例 #9
0
                          'chem',
                          'prot',
                          'pred_Kd',
                          'Kd',
                          'Kdpoint',
                      ])

    models = sorted(set(df.model))
    betas = sorted(set(df.beta))

    for model in models:
        if model == 'Sparse Hybrid':
            palette = sns.color_palette('ch:2.5,-.2,dark=.3', len(betas))
        else:
            palette = list(reversed(sns.color_palette('Blues_d', len(betas))))
        plt.figure()

        for bidx, beta in enumerate(betas):
            df_subset = df[(df.model == model) & (df.beta == beta)]

            seen, order_list = set(), []
            for zinc, order, Kd in zip(df_subset.zincid, df_subset.order,
                                       df_subset.Kd):
                if zinc in seen:
                    continue
                seen.add(zinc)
                order_list.append((order, Kd))

            order_list = [
                order for order, _ in sorted(order_list, key=lambda x: x[1])
            ]
コード例 #10
0
#% Load model
model = load_model(filepath + 'unet_exp_' + str(exp) + '.h5', compile=False)
area = 11
# Prediction
ref_final, pre_final, prob_recontructed, ref_reconstructed, mask_no_considered_, mask_ts, time_ts = prediction(
    model, image_array, image_ref, final_mask, mask_ts_, patch_size, area)

# Metrics
cm = confusion_matrix(ref_final, pre_final)
metrics = compute_metrics(ref_final, pre_final)
print('Confusion  matrix \n', cm)
print('Accuracy: ', metrics[0])
print('F1score: ', metrics[1])
print('Recall: ', metrics[2])
print('Precision: ', metrics[3])

# Alarm area
total = (cm[1, 1] + cm[0, 1]) / len(ref_final) * 100
print('Area to be analyzed', total)

print('training time', end_training)
print('test time', time_ts)

#%% Show the results
# prediction of the whole image
fig1 = plt.figure('whole prediction')
plt.imshow(prob_recontructed)
# Show the test tiles
fig2 = plt.figure('prediction of test set')
plt.imshow(prob_recontructed * mask_ts)
コード例 #11
0
            for line in fileinput.input(f+"/control.queries"):
                if "loadFreqs" in line:
                    loads.append(line)
            if len(loads) < 2:
                print "coudln't find loads for " + f
                continue
            loads_d = []
            for i in loads:
                loads_d.append(get_num_dict(i))

            chord_loads.append(get_50_percent(loads_d[0]))
            vserver_loads.append(get_50_percent(loads_d[1]))

            x_values.append(next(get_numbers(f)))

    plt.figure().set_size_inches(6.5,5)
    plt.xlabel("#Nodes")
    plt.ylabel("% of nodes storing 50% of data")

    from matplotlib.ticker import EngFormatter
    formatter = EngFormatter(places=0)
    plt.gca().xaxis.set_major_formatter(formatter)

    plt.ylim(0,0.5)
    plt.xlim(0,1000000)

    out_file = "intro_lb_chord.pdf"

    d1 = prepare(x_values,chord_loads)
    d2 = prepare(x_values,vserver_loads)
コード例 #12
0
# ref_final, pre_final, prob_recontructed, ref_reconstructed, mask_no_considered_, mask_ts, time_ts = prediction(model, image_array, image_ref, final_mask, mask_ts_, patch_size, area)

# Metrics
true_labels = np.reshape(patches_test_ref, (patches_test_ref.shape[0]* patches_test_ref.shape[1]*patches_test_ref.shape[2]))

predicted_labels = np.reshape(patches_pred, (patches_pred.shape[0]* patches_pred.shape[1]*patches_pred.shape[2]))

cm = confusion_matrix(true_labels, predicted_labels)
metrics = compute_metrics(true_labels, predicted_labels)
print('Confusion  matrix \n', cm)
print('Accuracy: ', metrics[0])
print('F1score: ', metrics[1])
print('Recall: ', metrics[2])
print('Precision: ', metrics[3])

# Alarm area
total = (cm[1,1]+cm[0,1])/len(true_label)*100
print('Area to be analyzed',total)

print('training time', end_training)
print('test time', time_ts)

#%% Show the results
# prediction of the whole image
fig1 = plt.figure('whole prediction')
plt.imshow(prob_recontructed)

# Show the test tiles
# fig2 = plt.figure('prediction of test set')
# plt.imshow(prob_recontructed*mask_ts)
コード例 #13
0
y_true = masks.reshape(
    masks.shape[0] * masks.shape[1] * masks.shape[2] * masks.shape[3], 1)

y_scores = np.where(y_scores > 0.5, 1, 0)
y_true = np.where(y_true > 0.5, 1, 0)

import os
os.mkdir('./output')
output_folder = 'output/'

#Area under the ROC curve
fpr, tpr, thresholds = roc_curve((y_true), y_scores)
AUC_ROC = roc_auc_score(y_true, y_scores)
print("\nArea under the ROC curve: " + str(AUC_ROC))
roc_curve = plt.figure()
plt.plot(fpr, tpr, '-', label='Area Under the Curve (AUC = %0.4f)' % AUC_ROC)
plt.title('ROC curve')
plt.xlabel("FPR (False Positive Rate)")
plt.ylabel("TPR (True Positive Rate)")
plt.legend(loc="lower right")
plt.savefig(output_folder + "ROC.png")

#Precision-recall curve
precision, recall, thresholds = precision_recall_curve(y_true, y_scores)
precision = np.fliplr([precision])[0]
recall = np.fliplr([recall])[0]
AUC_prec_rec = np.trapz(precision, recall)
print("\nArea under Precision-Recall curve: " + str(AUC_prec_rec))
prec_rec_curve = plt.figure()
plt.plot(recall,
コード例 #14
0
def latent_scatter(var_unk_pred, y_unk_pred, acquisition, **kwargs):
    chems = kwargs['chems']
    chem2feature = kwargs['chem2feature']
    idx_obs = kwargs['idx_obs']
    idx_unk = kwargs['idx_unk']
    regress_type = kwargs['regress_type']
    prot_target = kwargs['prot_target']

    chem_idx_obs = sorted(set([i for i, _ in idx_obs]))
    chem_idx_unk = sorted(set([i for i, _ in idx_unk]))

    feature_obs = np.array([chem2feature[chems[i]] for i in chem_idx_obs])
    feature_unk = np.array([chem2feature[chems[i]] for i in chem_idx_unk])

    from sklearn.neighbors import NearestNeighbors
    nbrs = NearestNeighbors(n_neighbors=1).fit(feature_obs)
    dist = np.ravel(nbrs.kneighbors(feature_unk)[0])
    print('Distance Spearman r = {}, P = {}'.format(
        *ss.spearmanr(dist, var_unk_pred)))
    print('Distance Pearson rho = {}, P = {}'.format(
        *ss.pearsonr(dist, var_unk_pred)))

    X = np.vstack([feature_obs, feature_unk])
    labels = np.concatenate(
        [np.zeros(len(chem_idx_obs)),
         np.ones(len(chem_idx_unk))])
    sidx = np.argsort(-var_unk_pred)

    from fbpca import pca
    U, s, Vt = pca(
        X,
        k=3,
    )
    X_pca = U * s

    from umap import UMAP
    um = UMAP(
        n_neighbors=15,
        min_dist=0.5,
        n_components=2,
        metric='euclidean',
    )
    X_umap = um.fit_transform(X)

    from MulticoreTSNE import MulticoreTSNE as TSNE
    tsne = TSNE(
        n_components=2,
        n_jobs=20,
    )
    X_tsne = tsne.fit_transform(X)

    if prot_target is None:
        suffix = ''
    else:
        suffix = '_' + prot_target

    for name, coords in zip(
        ['pca', 'umap', 'tsne'],
        [X_pca, X_umap, X_tsne],
    ):
        plt.figure()
        sns.scatterplot(
            x=coords[labels == 1, 0],
            y=coords[labels == 1, 1],
            color='blue',
            alpha=0.1,
        )
        plt.scatter(
            x=coords[labels == 0, 0],
            y=coords[labels == 0, 1],
            color='orange',
            alpha=1.0,
            marker='x',
            linewidths=10,
        )
        plt.savefig('figures/latent_scatter_{}_ypred_{}{}.png'.format(
            name, regress_type, suffix),
                    dpi=300)
        plt.close()

        plt.figure()
        plt.scatter(x=coords[labels == 1, 0],
                    y=coords[labels == 1, 1],
                    c=ss.rankdata(var_unk_pred),
                    alpha=0.1,
                    cmap='coolwarm')
        plt.savefig('figures/latent_scatter_{}_var_{}{}.png'.format(
            name, regress_type, suffix),
                    dpi=300)
        plt.close()

        plt.figure()
        plt.scatter(x=coords[labels == 1, 0],
                    y=coords[labels == 1, 1],
                    c=-acquisition,
                    alpha=0.1,
                    cmap='hot')
        plt.savefig('figures/latent_scatter_{}_acq_{}{}.png'.format(
            name, regress_type, suffix),
                    dpi=300)
        plt.close()
コード例 #15
0
ファイル: parse_log.py プロジェクト: tjustorm/uncertainty
def parse_log(regress_type, experiment, **kwargs):
    log_fname = ('iterate_davis2011kinase_{}_{}.log'.format(
        regress_type, experiment))

    iteration = 0
    iter_to_Kds = {}
    iter_to_idxs = {}

    with open(log_fname) as f:

        while True:
            line = f.readline()
            if not line:
                break

            if not line.startswith('2019') and not line.startswith('2020'):
                continue
            if not ' | ' in line:
                continue

            line = line.split(' | ')[1]

            if line.startswith('Iteration'):
                iteration = int(line.strip().split()[-1])
                if not iteration in iter_to_Kds:
                    iter_to_Kds[iteration] = []
                if not iteration in iter_to_idxs:
                    iter_to_idxs[iteration] = []

                continue

            elif line.startswith('\tAcquire '):
                fields = line.strip().split()

                Kd = float(fields[-1])
                iter_to_Kds[iteration].append(Kd)

                chem_idx = int(fields[1].lstrip('(').rstrip(','))
                prot_idx = int(fields[2].strip().rstrip(')'))
                iter_to_idxs[iteration].append((chem_idx, prot_idx))

                continue

    assert (iter_to_Kds.keys() == iter_to_idxs.keys())
    iterations = sorted(iter_to_Kds.keys())

    # Plot Kd over iterations.

    Kd_iter, Kd_iter_max, Kd_iter_min = [], [], []
    all_Kds = []
    for iteration in iterations:
        Kd_iter.append(np.mean(iter_to_Kds[iteration]))
        Kd_iter_max.append(max(iter_to_Kds[iteration]))
        Kd_iter_min.append(min(iter_to_Kds[iteration]))
        all_Kds += list(iter_to_Kds[iteration])

        if iteration == 0:
            print('First average Kd is {}'.format(Kd_iter[0]))
        elif iteration > 4 and experiment == 'perprot':
            break

    print('Average Kd is {}'.format(np.mean(all_Kds)))

    plt.figure()
    plt.scatter(iterations, Kd_iter)
    plt.plot(iterations, Kd_iter)
    plt.fill_between(iterations, Kd_iter_min, Kd_iter_max, alpha=0.3)
    plt.viridis()
    plt.title(' '.join([regress_type, experiment]))
    plt.savefig('figures/Kd_over_iterations_{}_{}.png'.format(
        regress_type, experiment))
    plt.close()

    return

    # Plot differential entropy of acquired samples over iterations.

    chems = kwargs['chems']
    prots = kwargs['prots']
    chem2feature = kwargs['chem2feature']
    prot2feature = kwargs['prot2feature']

    d_entropies = []
    X_acquired = []
    for iteration in iterations:
        for i, j in iter_to_idxs[iteration]:
            chem = chems[i]
            prot = prots[j]
            X_acquired.append(chem2feature[chem] + prot2feature[prot])
        if len(X_acquired) <= 1:
            d_entropies.append(float('nan'))
        else:
            gaussian = GaussianMixture().fit(np.array(X_acquired))
            gaussian = multivariate_normal(gaussian.means_[0],
                                           gaussian.covariances_[0])
            d_entropies.append(gaussian.entropy())

    print('Final differential entropy is {}'.format(d_entropies[-1]))

    plt.figure()
    plt.scatter(iterations, d_entropies)
    plt.plot(iterations, d_entropies)
    plt.viridis()
    plt.title(' '.join([regress_type, experiment]))
    plt.savefig('figures/entropy_over_iterations_{}_{}.png'.format(
        regress_type, experiment))
    plt.close()