parser = argparse.ArgumentParser() parser.add_argument('--dset') parser.add_argument('--simname') parser.add_argument('--nsim', type=int, help='number of replicates to simulate') parser.add_argument('--index', type=int) parser.add_argument('--method') args = parser.parse_args() print('\n\n****') print(args) print('****\n\n') # read data data = cna.read(paths.tbru_h5ad + args.dset + '.h5ad') sampleXmeta = data.samplem # simulate phenotype np.random.seed(args.index) causal_batch = np.random.choice(sampleXmeta.batch.unique(), replace=True, size=args.nsim) Ys = np.array([(sampleXmeta.batch.values == cb).astype(np.float64) for cb in causal_batch]) Ys += 0.01 * np.random.randn(args.nsim, len(sampleXmeta)) # do analysis true_cell_scores = pd.DataFrame( np.random.randn(len(data), args.nsim), # this is a dummy value columns=[
parser.add_argument('--minNcells', type=int, default=None) parser.add_argument('--propcells', type=float, default=None) parser.add_argument('--inname', type=str) parser.add_argument('--outname', type=str, default=None) parser.add_argument('--seed', type=int, default=0) args = parser.parse_args() print('\n\n****') print(args) print('****\n\n') np.random.seed(args.seed) # read in dataset print('reading') data = cna.read(paths.simdata + args.inname + '.h5ad') del data.obsp # needed because data was saved with new version of scanpy sampleXmeta = data.samplem # downsample samples print('downsampling samples') ids = sampleXmeta.index.unique() ids_ = np.random.choice(ids, replace=False, size=args.N) sampleXmeta_ = sampleXmeta[sampleXmeta.index.isin(ids_)].copy() print('N =', len(sampleXmeta_), 'before minNcells') if args.minNcells is not None: sampleXmeta_ = sampleXmeta_[sampleXmeta_.C >= args.minNcells] N = len(sampleXmeta_) print('N =', N, 'after minNcells') data_ = data[data.obs.id.isin(sampleXmeta_.index.values)] data_._X = data._X[data.obs.id.isin(sampleXmeta_.index.values)]
import paths, simulation # Parse Arguments parser = argparse.ArgumentParser() parser.add_argument('--dset') parser.add_argument('--simname') parser.add_argument('--method') parser.add_argument('--index', type=int) parser.add_argument('--noise-level', type=float) #in units of std dev of noiseless phenotype args = parser.parse_args() print('\n\n****') print(args) print('****\n\n') # Read Data data = cna.read(paths.simdata + args.dset + '.h5ad') sampleXmeta = data.samplem sampleXmeta['age2'] = sampleXmeta.age**2 # simulate phenotype np.random.seed(args.index) phenos = ['TB_STATUS_CASE','EURad4KR','Sex_M','season_Winter','age'] covs = { 'TB_STATUS_CASE':['age','age2','Sex_M','season_Winter','EURad4KR'], 'EURad4KR':['Height','Weight','edu_cat_belowHighschool','season_Winter','TB_STATUS_CASE'], 'Sex_M':['Height','num_scar','TB_STATUS_CASE'], 'season_Winter':['age','Height','EURad4KR','BCG_scar','season_Spring'], 'age':['Weight','num_scar','season_Winter','TB_STATUS_CASE'], } Ys = sampleXmeta[phenos].T
parser = argparse.ArgumentParser() parser.add_argument('--dset') parser.add_argument('--simname') parser.add_argument('--method') parser.add_argument('--index', type=int) parser.add_argument('--causal-clustering', type=str) parser.add_argument('--noise-level', type=float) #in units of std dev of noiseless phenotype parser.add_argument('--QCclusters', type=bool, default=0) args = parser.parse_args() print('\n\n****') print(args) print('****\n\n') ## Load Data data = cna.read(paths.simdata + args.dset + '.h5ad', force_recompute=True) sampleXmeta = data.samplem ### If harmonized if args.dset[0:4] == "harm": data.obsm['X_pca'] = data.X # Simulate Phenotype np.random.seed(args.index) ## Compute true cell scores true_cell_scores = pd.get_dummies(data.obs[args.causal_clustering]) if args.QCclusters: retain_clusters = simulation.discard_bad_clusters( data, args.causal_clustering,