Beispiel #1
0
    if args.select_on is None and args.select_method is None:
        result_dir = result_prefix + os.sep + "no_feature"
    else:
        result_dir = result_prefix+os.sep+args.select_method+'_'+\
                str(args.n_features)+'_on_'+args.select_on
    os.makedirs(result_dir, exist_ok=True)

    load_ind, train_adata, test_adata = load_adata(result_dir)
    if not load_ind:
        train_adata, test_adata = dataloading_utils.load_Pancreas_adata(
            data_dir, result_dir, args=args)

        ## whether to purify reference dataset
        purify_method = ""
        if "purify_dist" in args.data_source:
            purify_method = "distance"
        elif "purify_SVM" in args.data_source:
            purify_method = "SVM"

        train_adata, test_adata = dataloading_utils.process_loaded_data(
            train_adata,
            test_adata,
            result_dir,
            args=args,
            purify_method=purify_method)
        print("Train anndata: \n", train_adata)
        print("Test anndata: \n", test_adata)

    method_utils.run_pipeline(args, train_adata, test_adata, data_dir,
                              result_dir)
Beispiel #2
0
        print("Test anndata: \n", test_adata)

    if args.downsample:  ## add shuffled cells
        train_cells = train_adata.obs_names.tolist()
        random.seed(args.sample_seed)
        random.shuffle(train_cells)  ## shuffle original cell list
        original_train_adata = train_adata.copy()[train_cells]
        for i in range(original_train_adata.shape[0] // args.downsample_size +
                       1):
            sampled_number = (i + 1) * args.downsample_size if (
                i + 1) * args.downsample_size < original_train_adata.shape[
                    0] else original_train_adata.shape[0]
            train_adata = original_train_adata[:sampled_number]
            sampled_result_dir = result_dir + os.sep + str(sampled_number)
            os.makedirs(sampled_result_dir, exist_ok=True)
            method_utils.run_pipeline(args, train_adata, test_adata, data_dir,
                                      sampled_result_dir)
    else:  ## add shuffled individuals
        if args.data_source == "mousebrain_crossdataset_inds":  ## a combined version
            pFC_samples = [
                x for x in train_adata.obs["Sample"].tolist() if x != 'nan'
            ]
            allen_samples = [
                x
                for x in train_adata.obs["external_donor_name_label"].tolist()
                if x != 'nan'
            ]
            train_adata.obs["ind"] = pFC_samples + allen_samples

        original_train_adata = train_adata.copy()
        train_inds = list(set(original_train_adata.obs['ind']))
        random.seed(args.sample_seed)