Example #1
0
accs_names = ['Calinski-Harabaz', 'Silhouette (euc)', 'Silhouette (corr)', 'Silhouette (jacc)', 'ARI']
accs = np.zeros((5, len(mixtures), len(num_cluster)))

for i in range(len(num_cluster)):
    for j in range(len(mixtures)):
        print('Iteration k={0} mix={1}')
        trg_k = num_cluster[i]
        mix = mixtures[j]

        # --------------------------------------------------
        # 3.1. SETUP SOURCE DATA NMF CLUSTERING
        # --------------------------------------------------
        src_clustering = None
        if src_data is not None:
            src_clustering = NmfClustering(src_data, src_gene_ids, num_cluster=arguments.src_k)
            src_clustering.add_cell_filter(src_cell_filter_fun)
            src_clustering.add_gene_filter(src_gene_filter_fun)
            src_clustering.set_data_transformation(src_data_transf_fun)

        # --------------------------------------------------
        # 3.2. SETUP TARGET DATA CLUSTERING
        # --------------------------------------------------
        if arguments.method is 'NMF' and src_data is not None:
            print('Transfer learning method is NMF.')
            trg_clustering = DaNmfClustering(src_clustering, trg_data, trg_gene_ids, num_cluster=trg_k)
            trg_clustering.add_cell_filter(trg_cell_filter_fun)
            trg_clustering.add_gene_filter(trg_gene_filter_fun)
            trg_clustering.set_data_transformation(trg_data_transf_fun)
            trg_clustering.apply(mix=mix, alpha=arguments.nmf_alpha, l1=arguments.nmf_l1,
                                 max_iter=arguments.nmf_max_iter, rel_err=arguments.nmf_rel_err)
Example #2
0
num_cluster = map(np.int, arguments.cluster_range.split(","))

accs_names = ['KTA (linear)',  'ARI']
accs = np.zeros((2, len(num_cluster)))

for i in range(len(num_cluster)):
    k = num_cluster[i]
    print('Iteration {0}, num-cluster={1}'.format(i, k))
    # --------------------------------------------------
    # 3.1. SETUP SOURCE DATA NMF CLUSTERING
    # --------------------------------------------------
    if labels is None:
        # No source labels are provided, generate them via NMF clustering
        nmf_labels = None
        nmf_labels = NmfClustering(data, gene_ids, num_cluster=k, labels=[])
        nmf_labels.add_cell_filter(cell_filter_fun)
        nmf_labels.add_gene_filter(gene_filter_fun)
        nmf_labels.set_data_transformation(data_transf_fun)
        nmf_labels.apply(k=k, alpha=arguments.nmf_alpha, l1=arguments.nmf_l1, max_iter=arguments.nmf_max_iter, rel_err=arguments.nmf_rel_err)
        labels = nmf_labels.cluster_labels

    # Use perfect number of latent states for nmf and sc3
    src_labels = np.array(labels, dtype=np.int)
    src_lbl_set = np.unique(src_labels)
    k_now = src_lbl_set.size

    nmf = None
    nmf = NmfClustering_initW(data, gene_ids, labels=labels, num_cluster=k_now)
    nmf.add_cell_filter(cell_filter_fun)
    nmf.add_gene_filter(gene_filter_fun)
    nmf.set_data_transformation(data_transf_fun)
Example #3
0
    # Cell and gene filter and transformation within the procedure
    cell_filter_fun = partial(sc.cell_filter,
                              num_expr_genes=min_expr_genes,
                              non_zero_threshold=non_zero_threshold)
    gene_filter_fun = partial(sc.gene_filter,
                              perc_consensus_genes=perc_consensus_genes,
                              non_zero_threshold=non_zero_threshold)
    data_transf_fun = sc.data_transformation_log2

# Generating labels from complete dataset
print "Train complete data"
complete_nmf = None
complete_nmf = NmfClustering(data,
                             np.arange(data.shape[0]),
                             num_cluster=num_cluster)
complete_nmf.add_cell_filter(cell_filter_fun)
complete_nmf.add_gene_filter(gene_filter_fun)
complete_nmf.set_data_transformation(data_transf_fun)
complete_nmf.apply(k=num_cluster,
                   alpha=nmf_alpha,
                   l1=nmf_l1,
                   max_iter=nmf_max_iter,
                   rel_err=nmf_rel_err)
# Get labels
desc, target_nmf, trg_lbls_pred, mixed_data = method_sc3_filter(
    complete_nmf,
    data, [],
    cell_filter=cell_filter_fun,
    gene_filter=gene_filter_fun,
    transformation=data_transf_fun,
    mix=0.0,
Example #4
0
cell_inds = sc.cell_filter(data, num_expr_genes=min_expr_genes, non_zero_threshold=non_zero_threshold)
data = data[:,cell_inds]
# labels = labels[cell_inds]
gene_inds = sc.gene_filter(data, perc_consensus_genes=perc_consensus_genes, non_zero_threshold=non_zero_threshold)
data = data[gene_inds, :]
data = sc.data_transformation_log2(data)
cell_filter_fun = partial(sc.cell_filter, num_expr_genes=0, non_zero_threshold=-1)
gene_filter_fun = partial(sc.gene_filter, perc_consensus_genes=1, non_zero_threshold=-1)
data_transf_fun = sc.no_data_transformation
print "data dimensions after preprocessing: genes x cells: ", data.shape

# Generating labels from complete dataset
print "Train complete data"
complete_nmf = None
complete_nmf = NmfClustering(data, np.arange(data.shape[0]), num_cluster=num_cluster)
complete_nmf.add_cell_filter(cell_filter_fun)
complete_nmf.add_gene_filter(gene_filter_fun)
complete_nmf.set_data_transformation(data_transf_fun)
complete_nmf.apply(k=num_cluster, alpha=nmf_alpha, l1=nmf_l1, max_iter=nmf_max_iter, rel_err=nmf_rel_err)

# Get NMF labels
labels_NMF = complete_nmf.cluster_labels
label_names, label_counts = np.unique(labels_NMF, return_counts = True)
print "Labels NMF: ", label_names
print "Counts NMF: ", label_counts

# Get SC3 labels
desc, target_nmf, trg_lbls_pred, mixed_data = method_sc3_filter(complete_nmf, data, [], cell_filter=cell_filter_fun, gene_filter=gene_filter_fun, transformation=data_transf_fun, mix=0.0, metric='euclidean', use_da_dists=False, n_trg_cluster=num_cluster)
labels_SC3 = trg_lbls_pred
label_names, label_counts = np.unique(labels_SC3, return_counts = True)
print "Labels SC3: ", label_names
Example #5
0
accs_names = [
    'KTA (linear)', 'Silhouette (euc)', 'Silhouette (pearson)',
    'Silhouette (spearman)', 'ARI'
]
accs = np.zeros((5, len(num_cluster)))

for i in range(len(num_cluster)):
    k = num_cluster[i]
    print('Iteration {0}, num-cluster={0}'.format(i, k))

    # --------------------------------------------------
    # 3.1. SETUP SOURCE DATA NMF CLUSTERING
    # --------------------------------------------------
    nmf = None
    nmf = NmfClustering(data, gene_ids, num_cluster=k)
    nmf.add_cell_filter(cell_filter_fun)
    nmf.add_gene_filter(gene_filter_fun)
    nmf.set_data_transformation(data_transf_fun)
    nmf.apply(k=k,
              alpha=arguments.nmf_alpha,
              l1=arguments.nmf_l1,
              max_iter=arguments.nmf_max_iter,
              rel_err=arguments.nmf_rel_err)

    # --------------------------------------------------
    # 3.2. EVALUATE CLUSTER ASSIGNMENT
    # --------------------------------------------------
    print('\nUnsupervised evaluation:')
    accs[0, i] = unsupervised_acc_kta(nmf.pp_data,
                                      nmf.cluster_labels,
                                      kernel='linear')
Example #6
0
                data,
                labels,
                mode=1,
                target_ncells=n_trg,
                source_ncells=n_src[s])

        src_labels = np.array(src_labels, dtype=np.int)
        #src_labels_SC3 = np.array(src_labels_SC3, dtype=np.int)

        # 3.c. train source once per repetition
        print "Train source data of rep {0}".format(r + 1)
        source_nmf = None
        source_nmf = NmfClustering(src,
                                   np.arange(src.shape[0]),
                                   num_cluster=num_cluster)
        source_nmf.add_cell_filter(cell_filter_fun)
        source_nmf.add_gene_filter(gene_filter_fun)
        source_nmf.set_data_transformation(data_transf_fun)
        source_nmf.apply(k=num_cluster,
                         alpha=nmf_alpha,
                         l1=nmf_l1,
                         max_iter=nmf_max_iter,
                         rel_err=nmf_rel_err)

        # Calculate ARIs and KTAs
        source_aris[s, r] = metrics.adjusted_rand_score(
            src_labels[source_nmf.remain_cell_inds], source_nmf.cluster_labels)

        print 'SOURCE ARI Labels NMF, Method NMF = ', source_aris[s, r]

        r += 1