Example #1
0
def method_da_nmf(src, src_labels, trg, trg_labels, n_src_cluster, n_trg_cluster, mix=0.0):
    src = NmfClustering(src, np.arange(src.shape[0]), num_cluster=n_src_cluster)
    cp = DaNmfClustering(src, trg, np.arange(trg.shape[0]), num_cluster=n_trg_cluster)
    cp.apply(mix=mix)
    lbls = cp.cluster_labels
    desc = 'DA-NMF-Mix {0}'.format(np.int(mix*100.))
    return desc, lbls, cp.reject
Example #2
0
def da_nmf_distances(data, gene_ids, src, metric='euclidean', mixture=0.5):
    if mixture == 0.0:
        return distances(data, [], metric=metric)

    cp = DaNmfClustering(src, data, gene_ids, num_cluster=3)
    cp.apply(mix=0.0)
    W, H, H2 = cp.intermediate_model
    # convex combination of vanilla distance and nmf distance
    dist1 = distances(data, [], metric=metric)
    dist2 = distances(W.dot(H2), [], metric=metric)
    # normalize distance
    if np.max(dist2) < 1e-10:
        if mixture == 1.0:
            raise Exception('Distances are all zero and mixture=1.0. Seems that source and target'
                            ' data do not go well together.')
        else:
            print 'Warning! Max distance is 0.0.'
    else:
        print 'Max dists before normalization: ', np.max(dist1), np.max(dist2)
        dist2 *= np.max(dist1) / np.max(dist2)
    return mixture*dist2 + (1.-mixture)*dist1
Example #3
0
        # --------------------------------------------------
        # 3.1. MIX TARGET & SOURCE DATE
        # --------------------------------------------------
        src_data = np.load(
            arguments.src_fname)  # src data gets while applying da_nmf...
        src_nmf = src_data['src'][()]
        print type(src_nmf)
        src_nmf.cell_filter_list = list()
        src_nmf.gene_filter_list = list()

        src_nmf.add_cell_filter(lambda x: np.arange(x.shape[1]).tolist())
        src_nmf.add_gene_filter(lambda x: np.arange(x.shape[0]).tolist())
        src_nmf.set_data_transformation(lambda x: x)

        da_nmf = DaNmfClustering(src_nmf, data, gene_ids, k)
        da_nmf.add_cell_filter(cell_filter_fun)
        da_nmf.add_gene_filter(gene_filter_fun)
        da_nmf.set_data_transformation(data_transf_fun)
        calc_transf = False
        if i == 0 and j == 0:
            calc_transf = True
        mix_data, _, _ = da_nmf.get_mixed_data(
            k=k,
            mix=mix,
            reject_ratio=0.,
            calc_transferability=calc_transf,
            max_iter=2000)
        mix_gene_ids = da_nmf.common_ids
        if calc_transf:
            _, accs_trans[j, i] = da_nmf.reject[-1]
Example #4
0
        # --------------------------------------------------
        # 3.1. SETUP SOURCE DATA NMF CLUSTERING
        # --------------------------------------------------
        src_clustering = None
        if src_data is not None:
            src_clustering = NmfClustering(src_data, src_gene_ids, num_cluster=arguments.src_k)
            src_clustering.add_cell_filter(src_cell_filter_fun)
            src_clustering.add_gene_filter(src_gene_filter_fun)
            src_clustering.set_data_transformation(src_data_transf_fun)

        # --------------------------------------------------
        # 3.2. SETUP TARGET DATA CLUSTERING
        # --------------------------------------------------
        if arguments.method is 'NMF' and src_data is not None:
            print('Transfer learning method is NMF.')
            trg_clustering = DaNmfClustering(src_clustering, trg_data, trg_gene_ids, num_cluster=trg_k)
            trg_clustering.add_cell_filter(trg_cell_filter_fun)
            trg_clustering.add_gene_filter(trg_gene_filter_fun)
            trg_clustering.set_data_transformation(trg_data_transf_fun)
            trg_clustering.apply(mix=mix, alpha=arguments.nmf_alpha, l1=arguments.nmf_l1,
                                 max_iter=arguments.nmf_max_iter, rel_err=arguments.nmf_rel_err)

        if arguments.method is 'NMF' and src_data is None:
            print('Single task clustering method is NMF.')
            trg_clustering = NmfClustering(trg_data, trg_gene_ids, num_cluster=trg_k)
            trg_clustering.add_cell_filter(trg_cell_filter_fun)
            trg_clustering.add_gene_filter(trg_gene_filter_fun)
            trg_clustering.set_data_transformation(trg_data_transf_fun)
            trg_clustering.apply(alpha=arguments.nmf_alpha, l1=arguments.nmf_l1,
                                 max_iter=arguments.nmf_max_iter, rel_err=arguments.nmf_rel_err)
Example #5
0
    data_src = data_transformation_log2(data_src)

    # Load Target data
    data_trg = np.loadtxt(path_trg)
    gene_ids_trg = np.loadtxt(path_geneids_trg, dtype=np.str)
    # Delete non-unique genes
    data_trg, gene_ids_trg = delete_nonunique_genes(data_trg, gene_ids_trg)
    # Apply cell filter
    valid_cells = cell_filter(data_trg)
    # Apply gene filter
    valid_genes = gene_filter(data_trg)

    # Create filtered data
    data_trg = data_trg[:, valid_cells]
    data_trg = data_trg[valid_genes, :]
    gene_ids_trg = gene_ids_trg[valid_genes]
    # Log transform data
    data_trg = data_transformation_log2(data_trg)

    # train source and test performance
    source_nmf = NmfClustering(data_src, gene_ids_src, num_cluster=n_source_cluster)
    source_nmf.apply(k=n_source_cluster, max_iter=100, rel_err=1e-3)

    # Number of repetitions can be changed in line 153 of utils.py
    target_nmf = DaNmfClustering(source_nmf, data_trg.copy(), gene_ids_trg, num_cluster=n_target_cluster)
    target_nmf.apply(k=n_target_cluster, calc_transferability=True)
    # target_nmf.transferability_pvalue

    # np.savez(fname, source_ari=source_ari, target_ari=target_ari, n_mix=n_mix, n_source=n_source, n_target=n_target, n_source_cluster=n_source_cluster,
    # n_target_cluster=n_target_cluster)
Example #6
0
        for i in range(len(n_target)):
            target_perc = n_target[i] / float(split_1.size)
            ttt = StratifiedShuffleSplit(n_splits=2, test_size=target_perc)
            for split_11, split_22 in ttt.split(data.pp_data[:, split_1].T, data.cluster_labels[split_1]):
                print split_11.size, split_22.size

            # shuffle the gene ids for testing
            perm_inds = np.random.permutation(data.pp_data.shape[0])
            target_gene_ids = gene_ids[perm_inds].copy()
            target_data = data.pp_data[:, split_1[split_22]]
            target_data = target_data[perm_inds, :]
            target_labels = data.cluster_labels[split_1[split_22]]

            for m in range(len(n_mix)):
                target_nmf = DaNmfClustering(source_nmf, target_data.copy(), target_gene_ids, num_cluster=n_target_cluster)
                # target_nmf.apply(k=n_target_cluster, mix=n_mix[m], calc_transferability=False)


                mixed_data, rec_trg_data, _ = target_nmf.get_mixed_data(mix=n_mix[m], use_H2=True, calc_transferability=False)
                W, H, H2 = target_nmf.intermediate_model

                num_cells = target_data.shape[1]
                max_pca_comp = np.ceil(num_cells*0.07).astype(np.int)
                min_pca_comp = np.floor(num_cells*0.04).astype(np.int)
                sc3_mix = SC3Clustering(mixed_data, target_gene_ids, pc_range=[min_pca_comp, max_pca_comp], sub_sample=True, consensus_mode=0)
                sc3_mix.add_distance_calculation(partial(sc.distances, metric='euclidean'))
                sc3_mix.add_dimred_calculation(partial(sc.transformations, components=max_pca_comp, method='pca'))
                sc3_mix.add_intermediate_clustering(partial(sc.intermediate_kmeans_clustering, k=n_target_cluster))
                sc3_mix.set_build_consensus_matrix(sc.build_consensus_matrix)
                sc3_mix.set_consensus_clustering(partial(sc.consensus_clustering, n_components=n_target_cluster))
Example #7
0
        # --------------------------------------------------
        src_data = np.load(arguments.src_fname)  # src data gets while applying da_nmf ...
        src_nmf = src_data['src'][()]
        # print type(src_nmf)
        src_nmf.cell_filter_list = list()
        src_nmf.gene_filter_list = list()
        # source data is already filtered and transformed ...
        src_nmf.add_cell_filter(lambda x: np.arange(x.shape[1]).tolist())
        src_nmf.add_gene_filter(lambda x: np.arange(x.shape[0]).tolist())
        src_nmf.set_data_transformation(lambda x: x)

        #print '\n\n+++++++++++++++++++++++++++++++++++'
        #print np.max(data)
        #print '+++++++++++++++++++++++++++++++++++\n\n'

        da_nmf = DaNmfClustering(src_nmf, data.copy(), gene_ids.copy(), k)
        da_nmf.add_cell_filter(cell_filter_fun)
        da_nmf.add_gene_filter(gene_filter_fun)
        da_nmf.set_data_transformation(data_transf_fun)
        mixed_data, _, _ = \
            da_nmf.get_mixed_data(mix=mix, calc_transferability=False)
        # mix_gene_ids = da_nmf.common_ids
        mix_gene_ids = da_nmf.gene_ids


        #print '\n\n+++++++++++++++++++++++++++++++++++------'
        #print np.max(data)
        #print '+++++++++++++++++++++++++++++++++++------\n\n'

        # --------------------------------------------------
        # 3.2. TARGET DATA CLUSTERING