exp_counter = 1 num_exps = len(n_src) * reps * len(percs) * len(methods) # Run jobs for s in range(len(n_src)): accs = np.zeros((len(acc_funcs), reps, len(percs), len(methods))) accs_desc = list() opt_mix_ind = np.zeros((reps, len(percs))) opt_mix_aris = np.zeros((reps, len(percs))) num_strat = np.zeros((reps, len(percs), len(methods))) res_desc = [] r = 0 while r < reps: # Split data in source and target randomly (mode =1) or randomly stratified (mode = 2) src, trg, src_labels, trg_labels = split_source_target( data, labels, mode=1, target_ncells=n_trg, source_ncells=n_src[s]) trg_labels = np.array(trg_labels, dtype=np.int) src_labels = np.array(src_labels, dtype=np.int) # 3.a. Subsampling order for target inds = np.random.permutation(trg_labels.size) # 3.b. Use perfect number of latent states for nmf and sc3 src_lbl_set = np.unique(src_labels) n_trg_cluster = np.unique(trg_labels).size n_src_cluster = src_lbl_set.size # 3.c. train source once per repetition print "Train source data of rep {0}".format(r + 1) source_nmf = None
if len(target_ncells_range) == 0: target_ncells_range = [args.target_ncells] for sidx, source_ncells in enumerate(source_ncells_range): for tidx, target_ncells in enumerate(target_ncells_range): print( '\nSplit artificial single-cell RNA-seq data in target and source data.' ) data_source, data_target, true_labels_source, true_labels_target = \ split_source_target( data, labels, target_ncells = target_ncells, source_ncells = source_ncells, source_clusters = source_clusters, noise_target = args.noise_target, noise_sd = args.noise_sd, mode = args.splitting_mode, common = args.common ) print 'Target data dimension: ', data_target.shape print 'Source data dimension: ', data_source.shape # 3. GENERATE GENE AND CELL NAMES gene_ids = np.arange(args.num_genes) # 4. SAVE RESULTS print('Saving target data to \'{0}\'.'.format(args.fout_target_data)) np.savetxt(os.path.splitext(args.fout_target_data)[0] + "_T" + str(tidx + 1) + "_" + str(target_ncells) + "_S" +
if len(source_ncells_range) == 0: source_ncells_range = [args.source_ncells] if len(target_ncells_range) == 0: target_ncells_range = [args.target_ncells] for sidx, source_ncells in enumerate(source_ncells_range): for tidx, target_ncells in enumerate(target_ncells_range): print('\nSplit artificial single-cell RNA-seq data in target and source data.') data_source, data_target, true_labels_source, true_labels_target = \ split_source_target( data, labels, target_ncells = target_ncells, source_ncells = source_ncells, source_clusters = source_clusters, noise_target = args.noise_target, noise_sd = args.noise_sd, mode = args.splitting_mode ) print 'Target data dimension: ', data_target.shape print 'Source data dimension: ', data_source.shape # 3. GENERATE GENE AND CELL NAMES gene_ids = np.arange(args.num_genes) # 4. SAVE RESULTS print('Saving target data to \'{0}\'.'.format(args.fout_target_data)) np.savetxt( os.path.splitext(args.fout_target_data)[0] + "_T" + str(tidx+1) + "_" + str(target_ncells) +
def experiment_loop(fname, methods, acc_funcs, n_src=800, n_trg=800, n_genes=1000, mode=2, reps=10, cluster_mode=False, cluster_spec=[1, 2, 3, [4, 5], [6, [7, 8]]], percs=[0.1, 0.4, 0.8]): flatten = lambda l: flatten(l[0]) + (flatten(l[1:]) if len(l) > 1 else []) if type(l) is list else [l] n_cluster = len(flatten(cluster_spec)) print 'Number of cluster is ', n_cluster accs = np.zeros((len(acc_funcs), reps, len(percs), len(methods))) accs_desc = [''] * len(acc_funcs) num_strat = np.zeros((reps, len(percs), len(methods))) res_desc = [] for r in range(reps): # 1. Generate scRNA data data, labels = generate_toy_data(num_genes=n_genes, num_cells=2.*(n_trg + n_src), # generate more data cluster_spec=cluster_spec) # 2. Split source and target according to specified mode/setting src, trg, src_labels, trg_labels = split_source_target(data, labels, target_ncells=n_trg, source_ncells=n_src, mode=mode, source_clusters=None, noise_target=False, noise_sd=0.1) trg_labels = np.array(trg_labels, dtype=np.int) src_labels = np.array(src_labels, dtype=np.int) # 3.a. Subsampling order for target inds = np.random.permutation(trg_labels.size) # 3.b. Use perfect number of latent states for nmf and sc3 src_lbl_set = np.unique(src_labels) n_trg_cluster = np.unique(trg_labels).size n_src_cluster = src_lbl_set.size # 3.c. Target data subsampling loop for i in range(len(percs)): if cluster_mode: n_trg_cluster = percs[i] p_trg = trg[:, inds[:n_trg]].copy() p_trg_labels = trg_labels[inds[:n_trg]].copy() else: n_trg_perc = np.int(n_trg * percs[i]) p_trg = trg[:, inds[:n_trg_perc]].copy() p_trg_labels = trg_labels[inds[:n_trg_perc]].copy() # 4. MTL/DA mixing parameter loop res_desc = list() for m in range(len(methods)): desc, lbls, reject = methods[m](src.copy(), src_labels.copy(), p_trg.copy(), p_trg_labels.copy(), n_src_cluster=n_src_cluster, n_trg_cluster=n_trg_cluster) res_desc.append(desc) # evaluation strat_lbl_inds = [] for n in range(p_trg_labels.size): if p_trg_labels[n] in src_lbl_set: strat_lbl_inds.append(n) for f in range(len(acc_funcs)): accs[f, r, i, m], desc = acc_funcs[f](p_trg.copy(), p_trg_labels.copy(), lbls.copy(), reject, strat_lbl_inds) accs_desc[f] = desc # save the result and then plot np.savez(fname, methods=methods, acc_funcs=acc_funcs, accs=accs, accs_desc=accs_desc, percs=percs, reps=reps, n_genes=n_genes, n_src=n_src, n_trg=n_trg, desc=res_desc, mode=mode, num_strat=num_strat, cluster_mode=cluster_mode) print('Done.')
num_strat = np.zeros((reps, len(percs), len(methods))) res_desc = [] r = 0 while r < reps: # 1. Generate scRNA data data, labels = generate_toy_data(num_genes=genes[g], num_cells=10. * (n_trg + n_src[s]), cluster_spec=cluster_spec) # 2. Split source and target according to specified mode/setting src, trg, src_labels, trg_labels = split_source_target( data, labels, target_ncells=n_trg, source_ncells=n_src[s], mode=7, source_clusters=None, noise_target=False, noise_sd=0.1, common=common[c], cluster_spec=cluster_spec) trg_labels = np.array(trg_labels, dtype=np.int) src_labels = np.array(src_labels, dtype=np.int) # 3.a. Subsampling order for target inds = np.random.permutation(trg_labels.size) # 3.b. Use perfect number of latent states for nmf and sc3 src_lbl_set = np.unique(src_labels) n_trg_cluster = np.unique(trg_labels).size n_src_cluster = src_lbl_set.size # 3.c. train source once per repetition source_nmf = NmfClustering(src,