예제 #1
0
        # No source labels are provided, generate them via NMF clustering
        nmf_labels = None
        nmf_labels = NmfClustering(data, gene_ids, num_cluster=k, labels=[])
        nmf_labels.add_cell_filter(cell_filter_fun)
        nmf_labels.add_gene_filter(gene_filter_fun)
        nmf_labels.set_data_transformation(data_transf_fun)
        nmf_labels.apply(k=k, alpha=arguments.nmf_alpha, l1=arguments.nmf_l1, max_iter=arguments.nmf_max_iter, rel_err=arguments.nmf_rel_err)
        labels = nmf_labels.cluster_labels

    # Use perfect number of latent states for nmf and sc3
    src_labels = np.array(labels, dtype=np.int)
    src_lbl_set = np.unique(src_labels)
    k_now = src_lbl_set.size

    nmf = None
    nmf = NmfClustering_initW(data, gene_ids, labels=labels, num_cluster=k_now)
    nmf.add_cell_filter(cell_filter_fun)
    nmf.add_gene_filter(gene_filter_fun)
    nmf.set_data_transformation(data_transf_fun)
    nmf.apply(k=k_now, alpha=arguments.nmf_alpha, l1=arguments.nmf_l1, max_iter=arguments.nmf_max_iter, rel_err=arguments.nmf_rel_err)

    # --------------------------------------------------
    # 3.2. EVALUATE CLUSTER ASSIGNMENT
    # --------------------------------------------------
    print('\nUnsupervised evaluation:')
    accs[0, i] = unsupervised_acc_kta(nmf.pp_data, nmf.cluster_labels, kernel='linear')
    print '  -KTA (linear)     : ', accs[0, i]
    print('\nSupervised evaluation:')
    accs[1, i] = metrics.adjusted_rand_score(labels[nmf.remain_cell_inds], nmf.cluster_labels)
    print '  -ARI: ', accs[1, i]
예제 #2
0
res_opt_mix_ind = np.zeros((reps, 1))
res_opt_mix_aris = np.zeros((reps, 1))
exp_counter = 1
num_exps = len(methods) * reps
accs = np.zeros((len(acc_funcs), len(methods)))
trg_labels = np.zeros((n_trg, len(methods)))
trg_labels_reps = np.zeros((n_trg, len(methods), reps))

# Use perfect number of latent states for nmf and sc3
src_lbl_set = np.unique(src_labels)
n_trg_cluster = num_cluster
n_src_cluster = src_lbl_set.size

## Train source once
source_nmf = NmfClustering_initW(data_source,
                                 np.arange(data_source.shape[0]),
                                 num_cluster=n_src_cluster,
                                 labels=src_labels)
source_nmf.apply(k=n_src_cluster,
                 alpha=nmf_alpha,
                 l1=nmf_l1,
                 max_iter=nmf_max_iter,
                 rel_err=nmf_rel_err)

## Calculate ARIs and KTAs
source_aris = metrics.adjusted_rand_score(
    src_labels[source_nmf.remain_cell_inds], source_nmf.cluster_labels)
print('SOURCE ARI = ', source_aris)

# Multiple replications loop
# MTL/DA mixing parameter loop
for r in range(reps):
예제 #3
0
            r = 0
            while r < reps:
                # 1. Generate scRNA data
                data, labels = generate_toy_data(num_genes=genes[g], num_cells=10. * (n_trg + n_src[s]), cluster_spec=cluster_spec, gamma_rate=gamma_rate)
                # 2. Split source and target according to specified mode/setting
                src, trg, src_labels, trg_labels = split_source_target(data, labels, target_ncells=n_trg, source_ncells=n_src[s], mode=splitting_mode, common=common[c], source_clusters=None, noise_target=False, noise_sd=0.1, cluster_spec=cluster_spec)
                trg_labels = np.array(trg_labels, dtype=np.int)
                src_labels = np.array(src_labels, dtype=np.int)
                # 3.a. Subsampling order for target
                inds = np.random.permutation(trg_labels.size)
                # 3.b. Use perfect number of latent states for nmf and sc3
                src_lbl_set = np.unique(src_labels)
                n_trg_cluster = np.unique(trg_labels).size
                n_src_cluster = src_lbl_set.size
                # 3.c. train source once per repetition
                source_nmf = NmfClustering_initW(src, np.arange(src.shape[0]), num_cluster=n_src_cluster, labels=src_labels)
                source_nmf.apply(k=n_src_cluster,  alpha=nmf_alpha, l1=nmf_l1, max_iter=nmf_max_iter, rel_err=nmf_rel_err)
                # Evaluate source clustering
                source_aris[s,g,c,r] = metrics.adjusted_rand_score(src_labels, source_nmf.cluster_labels)
                print('ITER(', r, '): SOURCE ARI = ', source_aris[s,g,c,r])

                # 3.d. Target data subsampling loop
                for i in range(len(percs)):
                    n_trg_perc = np.int(n_trg * percs[i])
                    p_trg = trg[:, inds[:n_trg_perc]].copy()
                    p_trg_labels = trg_labels[inds[:n_trg_perc]].copy()
                    # 4. MTL/DA mixing parameter loop
                    res_desc = list()
                    for m in range(len(methods)):
                        # Run method
                        print(('Running experiment {0} of {1}: repetition {2} - {3} source cells, {4} genes, {5} common clusters, {6} target cells and the {7}th method'.format(exp_counter, num_exps, r+1, n_src[s], genes[g], common[c],n_trg_perc, m+1)))