Beispiel #1
0
accs = np.zeros((2, len(num_cluster)))

for i in range(len(num_cluster)):
    k = num_cluster[i]
    print('Iteration {0}, num-cluster={1}'.format(i, k))
    # --------------------------------------------------
    # 3.1. SETUP SOURCE DATA NMF CLUSTERING
    # --------------------------------------------------
    if labels is None:
        # No source labels are provided, generate them via NMF clustering
        nmf_labels = None
        nmf_labels = NmfClustering(data, gene_ids, num_cluster=k, labels=[])
        nmf_labels.add_cell_filter(cell_filter_fun)
        nmf_labels.add_gene_filter(gene_filter_fun)
        nmf_labels.set_data_transformation(data_transf_fun)
        nmf_labels.apply(k=k, alpha=arguments.nmf_alpha, l1=arguments.nmf_l1, max_iter=arguments.nmf_max_iter, rel_err=arguments.nmf_rel_err)
        labels = nmf_labels.cluster_labels

    # Use perfect number of latent states for nmf and sc3
    src_labels = np.array(labels, dtype=np.int)
    src_lbl_set = np.unique(src_labels)
    k_now = src_lbl_set.size

    nmf = None
    nmf = NmfClustering_initW(data, gene_ids, labels=labels, num_cluster=k_now)
    nmf.add_cell_filter(cell_filter_fun)
    nmf.add_gene_filter(gene_filter_fun)
    nmf.set_data_transformation(data_transf_fun)
    nmf.apply(k=k_now, alpha=arguments.nmf_alpha, l1=arguments.nmf_l1, max_iter=arguments.nmf_max_iter, rel_err=arguments.nmf_rel_err)

    # --------------------------------------------------
Beispiel #2
0
                              perc_consensus_genes=perc_consensus_genes,
                              non_zero_threshold=non_zero_threshold)
    data_transf_fun = sc.data_transformation_log2

# Generating labels from complete dataset
print "Train complete data"
complete_nmf = None
complete_nmf = NmfClustering(data,
                             np.arange(data.shape[0]),
                             num_cluster=num_cluster)
complete_nmf.add_cell_filter(cell_filter_fun)
complete_nmf.add_gene_filter(gene_filter_fun)
complete_nmf.set_data_transformation(data_transf_fun)
complete_nmf.apply(k=num_cluster,
                   alpha=nmf_alpha,
                   l1=nmf_l1,
                   max_iter=nmf_max_iter,
                   rel_err=nmf_rel_err)
# Get labels
desc, target_nmf, trg_lbls_pred, mixed_data = method_sc3_filter(
    complete_nmf,
    data, [],
    cell_filter=cell_filter_fun,
    gene_filter=gene_filter_fun,
    transformation=data_transf_fun,
    mix=0.0,
    metric='euclidean',
    use_da_dists=False,
    n_trg_cluster=num_cluster)
labels = trg_lbls_pred
label_names, label_counts = np.unique(labels, return_counts=True)
Beispiel #3
0
    data_src = data_transformation_log2(data_src)

    # Load Target data
    data_trg = np.loadtxt(path_trg)
    gene_ids_trg = np.loadtxt(path_geneids_trg, dtype=np.str)
    # Delete non-unique genes
    data_trg, gene_ids_trg = delete_nonunique_genes(data_trg, gene_ids_trg)
    # Apply cell filter
    valid_cells = cell_filter(data_trg)
    # Apply gene filter
    valid_genes = gene_filter(data_trg)

    # Create filtered data
    data_trg = data_trg[:, valid_cells]
    data_trg = data_trg[valid_genes, :]
    gene_ids_trg = gene_ids_trg[valid_genes]
    # Log transform data
    data_trg = data_transformation_log2(data_trg)

    # train source and test performance
    source_nmf = NmfClustering(data_src, gene_ids_src, num_cluster=n_source_cluster)
    source_nmf.apply(k=n_source_cluster, max_iter=100, rel_err=1e-3)

    # Number of repetitions can be changed in line 153 of utils.py
    target_nmf = DaNmfClustering(source_nmf, data_trg.copy(), gene_ids_trg, num_cluster=n_target_cluster)
    target_nmf.apply(k=n_target_cluster, calc_transferability=True)
    # target_nmf.transferability_pvalue

    # np.savez(fname, source_ari=source_ari, target_ari=target_ari, n_mix=n_mix, n_source=n_source, n_target=n_target, n_source_cluster=n_source_cluster,
    # n_target_cluster=n_target_cluster)
Beispiel #4
0
def method_nmf(src, src_labels, trg, trg_labels, n_src_cluster, n_trg_cluster):
    ids = np.arange(trg.shape[0])
    cp = NmfClustering(trg, ids, num_cluster=n_trg_cluster)
    cp.apply()
    return 'NMF', cp.cluster_labels, None