Example #1
0
for i in range(len(num_cluster)):
    for j in range(len(mixtures)):
        print('Iteration k={0} mix={1}')
        trg_k = num_cluster[i]
        mix = mixtures[j]

        # --------------------------------------------------
        # 3.1. SETUP SOURCE DATA NMF CLUSTERING
        # --------------------------------------------------
        src_clustering = None
        if src_data is not None:
            src_clustering = NmfClustering(src_data, src_gene_ids, num_cluster=arguments.src_k)
            src_clustering.add_cell_filter(src_cell_filter_fun)
            src_clustering.add_gene_filter(src_gene_filter_fun)
            src_clustering.set_data_transformation(src_data_transf_fun)

        # --------------------------------------------------
        # 3.2. SETUP TARGET DATA CLUSTERING
        # --------------------------------------------------
        if arguments.method is 'NMF' and src_data is not None:
            print('Transfer learning method is NMF.')
            trg_clustering = DaNmfClustering(src_clustering, trg_data, trg_gene_ids, num_cluster=trg_k)
            trg_clustering.add_cell_filter(trg_cell_filter_fun)
            trg_clustering.add_gene_filter(trg_gene_filter_fun)
            trg_clustering.set_data_transformation(trg_data_transf_fun)
            trg_clustering.apply(mix=mix, alpha=arguments.nmf_alpha, l1=arguments.nmf_l1,
                                 max_iter=arguments.nmf_max_iter, rel_err=arguments.nmf_rel_err)

        if arguments.method is 'NMF' and src_data is None:
            print('Single task clustering method is NMF.')
Example #2
0
accs_names = ['KTA (linear)',  'ARI']
accs = np.zeros((2, len(num_cluster)))

for i in range(len(num_cluster)):
    k = num_cluster[i]
    print('Iteration {0}, num-cluster={1}'.format(i, k))
    # --------------------------------------------------
    # 3.1. SETUP SOURCE DATA NMF CLUSTERING
    # --------------------------------------------------
    if labels is None:
        # No source labels are provided, generate them via NMF clustering
        nmf_labels = None
        nmf_labels = NmfClustering(data, gene_ids, num_cluster=k, labels=[])
        nmf_labels.add_cell_filter(cell_filter_fun)
        nmf_labels.add_gene_filter(gene_filter_fun)
        nmf_labels.set_data_transformation(data_transf_fun)
        nmf_labels.apply(k=k, alpha=arguments.nmf_alpha, l1=arguments.nmf_l1, max_iter=arguments.nmf_max_iter, rel_err=arguments.nmf_rel_err)
        labels = nmf_labels.cluster_labels

    # Use perfect number of latent states for nmf and sc3
    src_labels = np.array(labels, dtype=np.int)
    src_lbl_set = np.unique(src_labels)
    k_now = src_lbl_set.size

    nmf = None
    nmf = NmfClustering_initW(data, gene_ids, labels=labels, num_cluster=k_now)
    nmf.add_cell_filter(cell_filter_fun)
    nmf.add_gene_filter(gene_filter_fun)
    nmf.set_data_transformation(data_transf_fun)
    nmf.apply(k=k_now, alpha=arguments.nmf_alpha, l1=arguments.nmf_l1, max_iter=arguments.nmf_max_iter, rel_err=arguments.nmf_rel_err)
Example #3
0
                              num_expr_genes=min_expr_genes,
                              non_zero_threshold=non_zero_threshold)
    gene_filter_fun = partial(sc.gene_filter,
                              perc_consensus_genes=perc_consensus_genes,
                              non_zero_threshold=non_zero_threshold)
    data_transf_fun = sc.data_transformation_log2

# Generating labels from complete dataset
print "Train complete data"
complete_nmf = None
complete_nmf = NmfClustering(data,
                             np.arange(data.shape[0]),
                             num_cluster=num_cluster)
complete_nmf.add_cell_filter(cell_filter_fun)
complete_nmf.add_gene_filter(gene_filter_fun)
complete_nmf.set_data_transformation(data_transf_fun)
complete_nmf.apply(k=num_cluster,
                   alpha=nmf_alpha,
                   l1=nmf_l1,
                   max_iter=nmf_max_iter,
                   rel_err=nmf_rel_err)
# Get labels
desc, target_nmf, trg_lbls_pred, mixed_data = method_sc3_filter(
    complete_nmf,
    data, [],
    cell_filter=cell_filter_fun,
    gene_filter=gene_filter_fun,
    transformation=data_transf_fun,
    mix=0.0,
    metric='euclidean',
    use_da_dists=False,
Example #4
0
                              non_zero_threshold=non_zero_threshold)
    gene_filter_fun = partial(sc.gene_filter,
                              perc_consensus_genes=perc_consensus_genes,
                              non_zero_threshold=non_zero_threshold)
    data_transf_fun = sc.data_transformation_log2

# Generating labels from complete dataset
print("Train complete data")
complete_nmf = None
complete_nmf = NmfClustering(data,
                             np.arange(data.shape[0]),
                             num_cluster=num_cluster,
                             labels=[])
complete_nmf.add_cell_filter(cell_filter_fun)
complete_nmf.add_gene_filter(gene_filter_fun)
complete_nmf.set_data_transformation(data_transf_fun)
complete_nmf.apply(k=num_cluster,
                   alpha=nmf_alpha,
                   l1=nmf_l1,
                   max_iter=nmf_max_iter,
                   rel_err=nmf_rel_err)

# Get labels
labels = complete_nmf.cluster_labels
label_names, label_counts = np.unique(labels, return_counts=True)
print("Labels: ", label_names)
print("Counts: ", label_counts)

# Adjust data
data = data[:, complete_nmf.remain_cell_inds]
Example #5
0
    'Silhouette (spearman)', 'ARI'
]
accs = np.zeros((5, len(num_cluster)))

for i in range(len(num_cluster)):
    k = num_cluster[i]
    print('Iteration {0}, num-cluster={0}'.format(i, k))

    # --------------------------------------------------
    # 3.1. SETUP SOURCE DATA NMF CLUSTERING
    # --------------------------------------------------
    nmf = None
    nmf = NmfClustering(data, gene_ids, num_cluster=k)
    nmf.add_cell_filter(cell_filter_fun)
    nmf.add_gene_filter(gene_filter_fun)
    nmf.set_data_transformation(data_transf_fun)
    nmf.apply(k=k,
              alpha=arguments.nmf_alpha,
              l1=arguments.nmf_l1,
              max_iter=arguments.nmf_max_iter,
              rel_err=arguments.nmf_rel_err)

    # --------------------------------------------------
    # 3.2. EVALUATE CLUSTER ASSIGNMENT
    # --------------------------------------------------
    print('\nUnsupervised evaluation:')
    accs[0, i] = unsupervised_acc_kta(nmf.pp_data,
                                      nmf.cluster_labels,
                                      kernel='linear')
    accs[1, i] = unsupervised_acc_silhouette(nmf.pp_data,
                                             nmf.cluster_labels,
Example #6
0
                mode=1,
                target_ncells=n_trg,
                source_ncells=n_src[s])

        src_labels = np.array(src_labels, dtype=np.int)
        #src_labels_SC3 = np.array(src_labels_SC3, dtype=np.int)

        # 3.c. train source once per repetition
        print "Train source data of rep {0}".format(r + 1)
        source_nmf = None
        source_nmf = NmfClustering(src,
                                   np.arange(src.shape[0]),
                                   num_cluster=num_cluster)
        source_nmf.add_cell_filter(cell_filter_fun)
        source_nmf.add_gene_filter(gene_filter_fun)
        source_nmf.set_data_transformation(data_transf_fun)
        source_nmf.apply(k=num_cluster,
                         alpha=nmf_alpha,
                         l1=nmf_l1,
                         max_iter=nmf_max_iter,
                         rel_err=nmf_rel_err)

        # Calculate ARIs and KTAs
        source_aris[s, r] = metrics.adjusted_rand_score(
            src_labels[source_nmf.remain_cell_inds], source_nmf.cluster_labels)

        print 'SOURCE ARI Labels NMF, Method NMF = ', source_aris[s, r]

        r += 1

print 'Mean ARIs', np.mean(source_aris)