# No source labels are provided, generate them via NMF clustering nmf_labels = None nmf_labels = NmfClustering(data, gene_ids, num_cluster=k, labels=[]) nmf_labels.add_cell_filter(cell_filter_fun) nmf_labels.add_gene_filter(gene_filter_fun) nmf_labels.set_data_transformation(data_transf_fun) nmf_labels.apply(k=k, alpha=arguments.nmf_alpha, l1=arguments.nmf_l1, max_iter=arguments.nmf_max_iter, rel_err=arguments.nmf_rel_err) labels = nmf_labels.cluster_labels # Use perfect number of latent states for nmf and sc3 src_labels = np.array(labels, dtype=np.int) src_lbl_set = np.unique(src_labels) k_now = src_lbl_set.size nmf = None nmf = NmfClustering_initW(data, gene_ids, labels=labels, num_cluster=k_now) nmf.add_cell_filter(cell_filter_fun) nmf.add_gene_filter(gene_filter_fun) nmf.set_data_transformation(data_transf_fun) nmf.apply(k=k_now, alpha=arguments.nmf_alpha, l1=arguments.nmf_l1, max_iter=arguments.nmf_max_iter, rel_err=arguments.nmf_rel_err) # -------------------------------------------------- # 3.2. EVALUATE CLUSTER ASSIGNMENT # -------------------------------------------------- print('\nUnsupervised evaluation:') accs[0, i] = unsupervised_acc_kta(nmf.pp_data, nmf.cluster_labels, kernel='linear') print ' -KTA (linear) : ', accs[0, i] print('\nSupervised evaluation:') accs[1, i] = metrics.adjusted_rand_score(labels[nmf.remain_cell_inds], nmf.cluster_labels) print ' -ARI: ', accs[1, i]
res_opt_mix_ind = np.zeros((reps, 1)) res_opt_mix_aris = np.zeros((reps, 1)) exp_counter = 1 num_exps = len(methods) * reps accs = np.zeros((len(acc_funcs), len(methods))) trg_labels = np.zeros((n_trg, len(methods))) trg_labels_reps = np.zeros((n_trg, len(methods), reps)) # Use perfect number of latent states for nmf and sc3 src_lbl_set = np.unique(src_labels) n_trg_cluster = num_cluster n_src_cluster = src_lbl_set.size ## Train source once source_nmf = NmfClustering_initW(data_source, np.arange(data_source.shape[0]), num_cluster=n_src_cluster, labels=src_labels) source_nmf.apply(k=n_src_cluster, alpha=nmf_alpha, l1=nmf_l1, max_iter=nmf_max_iter, rel_err=nmf_rel_err) ## Calculate ARIs and KTAs source_aris = metrics.adjusted_rand_score( src_labels[source_nmf.remain_cell_inds], source_nmf.cluster_labels) print('SOURCE ARI = ', source_aris) # Multiple replications loop # MTL/DA mixing parameter loop for r in range(reps):
r = 0 while r < reps: # 1. Generate scRNA data data, labels = generate_toy_data(num_genes=genes[g], num_cells=10. * (n_trg + n_src[s]), cluster_spec=cluster_spec, gamma_rate=gamma_rate) # 2. Split source and target according to specified mode/setting src, trg, src_labels, trg_labels = split_source_target(data, labels, target_ncells=n_trg, source_ncells=n_src[s], mode=splitting_mode, common=common[c], source_clusters=None, noise_target=False, noise_sd=0.1, cluster_spec=cluster_spec) trg_labels = np.array(trg_labels, dtype=np.int) src_labels = np.array(src_labels, dtype=np.int) # 3.a. Subsampling order for target inds = np.random.permutation(trg_labels.size) # 3.b. Use perfect number of latent states for nmf and sc3 src_lbl_set = np.unique(src_labels) n_trg_cluster = np.unique(trg_labels).size n_src_cluster = src_lbl_set.size # 3.c. train source once per repetition source_nmf = NmfClustering_initW(src, np.arange(src.shape[0]), num_cluster=n_src_cluster, labels=src_labels) source_nmf.apply(k=n_src_cluster, alpha=nmf_alpha, l1=nmf_l1, max_iter=nmf_max_iter, rel_err=nmf_rel_err) # Evaluate source clustering source_aris[s,g,c,r] = metrics.adjusted_rand_score(src_labels, source_nmf.cluster_labels) print('ITER(', r, '): SOURCE ARI = ', source_aris[s,g,c,r]) # 3.d. Target data subsampling loop for i in range(len(percs)): n_trg_perc = np.int(n_trg * percs[i]) p_trg = trg[:, inds[:n_trg_perc]].copy() p_trg_labels = trg_labels[inds[:n_trg_perc]].copy() # 4. MTL/DA mixing parameter loop res_desc = list() for m in range(len(methods)): # Run method print(('Running experiment {0} of {1}: repetition {2} - {3} source cells, {4} genes, {5} common clusters, {6} target cells and the {7}th method'.format(exp_counter, num_exps, r+1, n_src[s], genes[g], common[c],n_trg_perc, m+1)))