Ejemplo n.º 1
0
parser = argparse.ArgumentParser()
parser.add_argument('--dset')
parser.add_argument('--simname')
parser.add_argument('--nsim',
                    type=int,
                    help='number of replicates to simulate')
parser.add_argument('--index', type=int)
parser.add_argument('--method')
args = parser.parse_args()

print('\n\n****')
print(args)
print('****\n\n')

# read data
data = cna.read(paths.tbru_h5ad + args.dset + '.h5ad')
sampleXmeta = data.samplem

# simulate phenotype
np.random.seed(args.index)
causal_batch = np.random.choice(sampleXmeta.batch.unique(),
                                replace=True,
                                size=args.nsim)
Ys = np.array([(sampleXmeta.batch.values == cb).astype(np.float64)
               for cb in causal_batch])
Ys += 0.01 * np.random.randn(args.nsim, len(sampleXmeta))

# do analysis
true_cell_scores = pd.DataFrame(
    np.random.randn(len(data), args.nsim),  # this is a dummy value
    columns=[
Ejemplo n.º 2
0
parser.add_argument('--minNcells', type=int, default=None)
parser.add_argument('--propcells', type=float, default=None)
parser.add_argument('--inname', type=str)
parser.add_argument('--outname', type=str, default=None)
parser.add_argument('--seed', type=int, default=0)
args = parser.parse_args()

print('\n\n****')
print(args)
print('****\n\n')

np.random.seed(args.seed)

# read in dataset
print('reading')
data = cna.read(paths.simdata + args.inname + '.h5ad')
del data.obsp  # needed because data was saved with new version of scanpy
sampleXmeta = data.samplem

# downsample samples
print('downsampling samples')
ids = sampleXmeta.index.unique()
ids_ = np.random.choice(ids, replace=False, size=args.N)
sampleXmeta_ = sampleXmeta[sampleXmeta.index.isin(ids_)].copy()
print('N =', len(sampleXmeta_), 'before minNcells')
if args.minNcells is not None:
    sampleXmeta_ = sampleXmeta_[sampleXmeta_.C >= args.minNcells]
N = len(sampleXmeta_)
print('N =', N, 'after minNcells')
data_ = data[data.obs.id.isin(sampleXmeta_.index.values)]
data_._X = data._X[data.obs.id.isin(sampleXmeta_.index.values)]
Ejemplo n.º 3
0
import paths, simulation

# Parse Arguments
parser = argparse.ArgumentParser()
parser.add_argument('--dset')
parser.add_argument('--simname')
parser.add_argument('--method')
parser.add_argument('--index', type=int)
parser.add_argument('--noise-level', type=float) #in units of std dev of noiseless phenotype
args = parser.parse_args()
print('\n\n****')
print(args)
print('****\n\n')

# Read Data
data = cna.read(paths.simdata + args.dset + '.h5ad')
sampleXmeta = data.samplem
sampleXmeta['age2'] = sampleXmeta.age**2

# simulate phenotype
np.random.seed(args.index)
phenos = ['TB_STATUS_CASE','EURad4KR','Sex_M','season_Winter','age']
covs = {
    'TB_STATUS_CASE':['age','age2','Sex_M','season_Winter','EURad4KR'],
    'EURad4KR':['Height','Weight','edu_cat_belowHighschool','season_Winter','TB_STATUS_CASE'],
    'Sex_M':['Height','num_scar','TB_STATUS_CASE'],
    'season_Winter':['age','Height','EURad4KR','BCG_scar','season_Spring'],
    'age':['Weight','num_scar','season_Winter','TB_STATUS_CASE'],
}

Ys = sampleXmeta[phenos].T
Ejemplo n.º 4
0
parser = argparse.ArgumentParser()
parser.add_argument('--dset')
parser.add_argument('--simname')
parser.add_argument('--method')
parser.add_argument('--index', type=int)
parser.add_argument('--causal-clustering', type=str)
parser.add_argument('--noise-level',
                    type=float)  #in units of std dev of noiseless phenotype
parser.add_argument('--QCclusters', type=bool, default=0)
args = parser.parse_args()
print('\n\n****')
print(args)
print('****\n\n')

## Load Data
data = cna.read(paths.simdata + args.dset + '.h5ad', force_recompute=True)
sampleXmeta = data.samplem

### If harmonized
if args.dset[0:4] == "harm":
    data.obsm['X_pca'] = data.X

# Simulate Phenotype
np.random.seed(args.index)

## Compute true cell scores
true_cell_scores = pd.get_dummies(data.obs[args.causal_clustering])
if args.QCclusters:
    retain_clusters = simulation.discard_bad_clusters(
        data,
        args.causal_clustering,