def acquisition_scatter(y_unk_pred, var_unk_pred, acquisition, regress_type):
    y_unk_pred = y_unk_pred[:]
    y_unk_pred[y_unk_pred > 10000] = 10000

    plt.figure()
    plt.scatter(y_unk_pred, var_unk_pred, alpha=0.5, c=-acquisition,
                cmap='hot')
    plt.title(regress_type.title())
    plt.xlabel('Predicted score')
    plt.ylabel('Variance')
    plt.savefig('figures/acquisition_unknown_{}.png'
                .format(regress_type), dpi=200)
    plt.close()
def score_scatter(y_pred, y, var_pred, regress_type, prefix=''):
    y_pred = y_pred[:]
    y_pred[y_pred < 0] = 0
    y_pred[y_pred > 10000] = 10000

    plt.figure()
    plt.scatter(y_pred, var_pred, alpha=0.3,
                c=(y - y.min()) / (y.max() - y.min()))
    plt.viridis()
    plt.xlabel('Predicted score')
    plt.ylabel('Variance')
    plt.savefig('figures/variance_vs_pred_{}regressors{}.png'
                .format(prefix, regress_type), dpi=300)
    plt.close()
                    loads.append(line)
            if len(loads) < 2:
                print "coudln't find loads for " + f
                continue
            loads_d = []
            for i in loads:
                loads_d.append(get_num_dict(i))

            chord_loads.append(get_50_percent(loads_d[0]))
            vserver_loads.append(get_50_percent(loads_d[1]))

            x_values.append(next(get_numbers(f)))

    plt.figure().set_size_inches(6.5,5)
    plt.xlabel("#Nodes")
    plt.ylabel("% of nodes storing 50% of data")

    from matplotlib.ticker import EngFormatter
    formatter = EngFormatter(places=0)
    plt.gca().xaxis.set_major_formatter(formatter)

    plt.ylim(0,0.5)
    plt.xlim(0,1000000)

    out_file = "intro_lb_chord.pdf"

    d1 = prepare(x_values,chord_loads)
    d2 = prepare(x_values,vserver_loads)

    d1['label'] = 'Neighbor Replication'
    d1['linestyle'] = 'dashed'
y_scores = np.where(y_scores > 0.5, 1, 0)
y_true = np.where(y_true > 0.5, 1, 0)

import os
os.mkdir('./output')
output_folder = 'output/'

#Area under the ROC curve
fpr, tpr, thresholds = roc_curve((y_true), y_scores)
AUC_ROC = roc_auc_score(y_true, y_scores)
print("\nArea under the ROC curve: " + str(AUC_ROC))
roc_curve = plt.figure()
plt.plot(fpr, tpr, '-', label='Area Under the Curve (AUC = %0.4f)' % AUC_ROC)
plt.title('ROC curve')
plt.xlabel("FPR (False Positive Rate)")
plt.ylabel("TPR (True Positive Rate)")
plt.legend(loc="lower right")
plt.savefig(output_folder + "ROC.png")

#Precision-recall curve
precision, recall, thresholds = precision_recall_curve(y_true, y_scores)
precision = np.fliplr([precision])[0]
recall = np.fliplr([recall])[0]
AUC_prec_rec = np.trapz(precision, recall)
print("\nArea under Precision-Recall curve: " + str(AUC_prec_rec))
prec_rec_curve = plt.figure()
plt.plot(recall,
         precision,
         '-',
         label='Area Under the Curve (AUC = %0.4f)' % AUC_prec_rec)
plt.title('Precision - Recall curve')
Exemple #5
0
    datasets, genes_list, n_cells = load_names(data_names)
    #datasets, genes = merge_datasets(datasets, genes_list)
    #datasets_dimred, genes = process_data(datasets, genes, hvg=hvg)
    datasets, genes = correct(datasets, genes_list)
    X = np.concatenate(datasets)
    X[X < 0] = 0

    cell_labels = (
        open('data/cell_labels/pancreas_cluster.txt').read().rstrip().split())
    er_idx = [i for i, cl in enumerate(cell_labels) if cl == 'beta_er']
    beta_idx = [i for i, cl in enumerate(cell_labels) if cl == 'beta']

    gadd_idx = list(genes).index('GADD45A')
    herp_idx = list(genes).index('HERPUD1')

    plt.figure()
    plt.boxplot([X[er_idx, gadd_idx], X[beta_idx, gadd_idx]], showmeans=True)
    plt.title('GADD45A (p < {})'.format(
        ttest_ind(X[er_idx, gadd_idx], X[beta_idx, gadd_idx])[1]))
    plt.xticks([1, 2], ['beta_er', 'beta'])
    plt.ylabel('Scaled gene expression')
    plt.savefig('er_stress_GADD45A.svg')

    plt.figure()
    plt.boxplot([X[er_idx, herp_idx], X[beta_idx, herp_idx]], showmeans=True)
    plt.title('HERPUD1 (p < {})'.format(
        ttest_ind(X[er_idx, herp_idx], X[beta_idx, herp_idx])[1]))
    plt.xticks([1, 2], ['beta_er', 'beta'])
    plt.ylabel('Scaled gene expression')
    plt.savefig('er_stress_HERPUD1.svg')
            for line in fileinput.input(f+"/control.messagecost.weighted"):
                ns = get_numbers(line)
                count += list(ns)[-1]

            nsize = next(get_numbers(f))
            if "chord" in f:
                chord_values.append(count/nsize)
                x_values1.append(nsize)
            else:
                best_values.append(count/nsize)
                x_values2.append(nsize)


    plt.figure().set_size_inches(6.5,5)
    plt.xlabel("#Nodes")
    plt.ylabel("Per-node Replication Maintenance Cost (KB)")

    from matplotlib.ticker import EngFormatter
    formatter = EngFormatter(places=0)
    plt.gca().xaxis.set_major_formatter(formatter)

    plt.yscale('log')
    plt.xlim(0,1000000)

    out_file = "intro_rep_ma.pdf"

    d1 = prepare(x_values1,chord_values)
    d2 = prepare(x_values2,best_values)

    d1['label'] = 'Neighbor Replication'
    d1['linestyle'] = 'dashed'
Exemple #7
0
if __name__ == '__main__':
    labels = np.array(open('data/cell_labels/all.txt').read().rstrip().split())

    # Scanorama.
    X = np.loadtxt('data/panorama_embedding.txt')
    idx = np.random.choice(X.shape[0], size=20000, replace=False)
    sil_pan = sil(X[idx, :], labels[idx])
    print(np.median(sil_pan))

    # scran MNN.
    X = np.loadtxt('data/mnn_embedding.txt')
    idx = np.random.choice(X.shape[0], size=20000, replace=False)
    sil_mnn = sil(X[idx, :], labels[idx])
    print(np.median(sil_mnn))

    # Seurat CCA.
    X = np.loadtxt('data/cca_embedding.txt')
    idx = np.random.choice(X.shape[0], size=20000, replace=False)
    sil_cca = sil(X[idx, :], labels[idx])
    print(np.median(sil_cca))

    print(ttest_ind(sil_pan, sil_mnn))
    print(ttest_ind(sil_pan, sil_cca))

    plt.figure()
    plt.boxplot([sil_pan, sil_mnn, sil_cca], showmeans=True)
    plt.title('Distributions of Silhouette Coefficients')
    plt.xticks([1, 2, 3], ['Scanorama', 'scran MNN', 'Seurat CCA'])
    plt.ylabel('Silhouette Coefficient')
    plt.savefig('silhouette.svg')
Exemple #8
0
    datasets = [normalize(ds, axis=1) for ds in datasets]

    tsne = TSNE(n_iter=400, perplexity=100, verbose=2, random_state=69)

    tsne.fit(np.concatenate(datasets[1:]))
    plot_clusters(tsne.embedding_, np.concatenate(clusters[1:]), s=500)
    plt.title('Uncorrected data')
    plt.savefig('simulation_uncorrected.svg')

    # Assemble datasets.
    assembled = assemble(datasets[1:], verbose=1, sigma=1, knn=10, approx=True)
    tsne.fit(datasets[1])
    plot_clusters(tsne.embedding_, clusters[1], s=500)
    plt.title('Dataset 1')
    plt.xlabel('t-SNE 1')
    plt.ylabel('t-SNE 2')
    plt.savefig('simulation_ds1.svg')

    tsne.fit(datasets[2])
    plot_clusters(tsne.embedding_, clusters[2], s=500)
    plt.title('Dataset 2')
    plt.xlabel('t-SNE 1')
    plt.ylabel('t-SNE 2')
    plt.savefig('simulation_ds2.svg')

    tsne.fit(datasets[3])
    plot_clusters(tsne.embedding_, clusters[3], s=500)
    plt.title('Dataset 3')
    plt.xlabel('t-SNE 1')
    plt.ylabel('t-SNE 2')
    plt.savefig('simulation_ds3.svg')
                print "coudln't find loads for " + f
                continue
            monitor_d = []
            for i in monitor:
                monitor_d.append(get_num_dict(i))

            t_d = expand_num_dict(monitor_d[0]) 
            chord_monitor.append(float(sum(t_d))/len(t_d))
            t_d = expand_num_dict(monitor_d[1]) 
            vserver_monitor.append(float(sum(t_d))/len(t_d))

            x_values.append(next(get_numbers(f)))

    plt.figure().set_size_inches(6.5,5)
    plt.xlabel("#Nodes")
    plt.ylabel("#Monitored Nodes")

    from matplotlib.ticker import EngFormatter
    formatter = EngFormatter(places=0)
    plt.gca().xaxis.set_major_formatter(formatter)

    plt.xlim(0,1000000)

    out_file = "intro_mon_chord.pdf"

    d1 = prepare(x_values,chord_monitor)
    d2 = prepare(x_values,vserver_monitor)

    d1['label'] = 'Neighbor Replication'
    d1['linestyle'] = 'dashed'
    d2['label'] = "Virtual Servers"