# load phenotypes and covariates phenotype_df, phenotype_pos_df = tensorqtl.read_phenotype_bed(expression_bed) covariates_df = pd.read_csv(covariates_file, sep='\t', index_col=0).T pr = genotypeio.PlinkReader(plink_prefix_path, exclude_chrs=excluded_chr_list) genotype_df = pr.load_genotypes() variant_df = pr.bim.set_index('snp')[['chrom', 'pos']] if mode == 'cis': # cis-QTL: empirical p-values for phenotypes if excluded_chr_list: cis_df = cis.map_cis( genotype_df, variant_df, phenotype_df.loc[phenotype_pos_df['chr'] == chr_id], phenotype_pos_df.loc[phenotype_pos_df['chr'] == chr_id], covariates_df=covariates_df, seed=args.seed) else: cis_df = cis.map_cis(genotype_df, variant_df, phenotype_df, phenotype_pos_df, covariates_df=covariates_df, seed=args.seed) out_file = os.path.join(args.output_dir, prefix + '.cis_qtl.txt.gz') cis_df.to_csv(out_file, sep='\t') elif mode == 'cis_independent': cis_df = pd.read_csv(in_cis_addr, sep='\t', index_col=0)
#for i in 1:22: #pairs_df_+i.to_csv('/lustre03/project/6032391/GROUP/sc_rnaseq/tensorqtl/results/chromatin/cis_caqtl_chr'+i+'_nominal.txt',sep='\t') # all genes # cis_df = cis.map_cis(genotype_df, variant_df, phenotype_df, phenotype_pos_df, covariates_df) # genes on chr18 #cis_df = cis.map_cis(genotype_df, variant_df, # phenotype_df.loc[phenotype_pos_df['chr']=='1'], # phenotype_pos_df.loc[phenotype_pos_df['chr']=='1'], # covariates_df=covariates_df, seed=123456) cis_df = cis.map_cis(genotype_df, variant_df, phenotype_df.loc[phenotype_pos_df['chr'].isin(['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22'])], phenotype_pos_df.loc[phenotype_pos_df['chr'].isin(['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22'])], covariates_df=covariates_df, seed=123456) cis_df.to_csv('/lustre03/project/6032391/GROUP/sc_rnaseq/tensorqtl/results/expression/Tcis_eqtl_Bcells_permutations.txt',sep='\t') trans_df = trans.map_trans(genotype_df, phenotype_df, covariates_df, batch_size=10000, return_sparse=True, pval_threshold=1e-5, maf_threshold=0.05) trans_df.to_csv('/lustre03/project/6032391/GROUP/sc_rnaseq/tensorqtl/results/expression/Ttrans_eqtl_Bcells_nominal.txt',sep='\t') #cis_df.head()
# load genotypes and variants into data frames genotype_df = pd.DataFrame(pr.load_genotypes(), index=pr.bim['snp'], columns=pr.fam['iid']) variant_df = pr.bim.set_index('snp')[['chrom', 'pos']] # Subset to Dirk's GMPR SNPs of interest gmpr = pd.read_csv(outpath + "GMPR_variants_for_lookup_20200717_LS_sorted.txt", sep='\t') gmpr_genotype_df = genotype_df.loc[gmpr['rsID']] gmpr_genotype_df = gmpr_genotype_df[gmpr_genotype_df['110000315494'].notnull()] gmpr_variant_df = variant_df.loc[gmpr['rsID']] gmpr_variant_df = gmpr_variant_df[gmpr_variant_df['chrom'].notnull()] # Call cis-eQTLs cis_df = cis.map_cis(gmpr_genotype_df, gmpr_variant_df, phenotype_df, phenotype_pos_df, covariates_df) tensorqtl.calculate_qvalues(cis_df, qvalue_lambda=0.85) cis_df.to_csv(outpath + "tensorqtl_cis_cisPerGene_chr" + chr + ".csv", index=True, index_label="Phenotype") # Cis nominal mapping cisnom_df = cis.map_nominal(gmpr_genotype_df, gmpr_variant_df, phenotype_df, phenotype_pos_df, covariates_df, prefix=outpath + "tensorqtl_cis_cisNominal_chr" + chr) cisnom_df2 = pd.read_parquet( outpath + "tensorqtl_cis_cisNominal_chr6.cis_qtl_pairs.6.parquet")
index=pr.bim['snp'], columns=pr.fam['iid']) variant_df = pr.bim.set_index('snp')[['chrom', 'pos']] covariates_df = pd.read_csv(covariates_file, sep='\t', index_col=0).T # samples x covariates covariates_df = covariates_df[["age_RNA"]] # Read in genotypes pr = genotypeio.PlinkReader(plink_prefix_path) # Limit to 3 phenotypes to test conditional analysis phenotype_df = all_phenotype_df[0:15] phenotype_pos_df = all_phenotype_pos_df[0:15] # Cis gene-level mapping cis_df = cis.map_cis(genotype_df, variant_df, phenotype_df, phenotype_pos_df, covariates_df) tensorqtl.calculate_qvalues( cis_df, qvalue_lambda=0) # lambda of 0 is equivalent to BH correction cis_df.to_csv(outpath + "tensorqtl_cis_MAF0.005_cisPerGene_3phenotest_chr" + chr + ".csv", index=True, index_label="Phenotype") # Cis nominal mapping #cis.map_nominal(genotype_df, variant_df, phenotype_df, phenotype_pos_df, covariates_df, prefix=outpath + "tensorqtl_cis_MAF0.005_cisNominal_3phenotest_chr" + chr) #cisnom_df = pd.read_parquet(outpath + "tensorqtl_cis_MAF0.005_cisNominal_3phenotest_chr21.cis_qtl_pairs.21.parquet") # Conditional analysis indep_df = cis.map_independent(genotype_df, variant_df, cis_df,
plink_prefix_path = "/rds/user/jm2294/rds-jmmh2-projects/interval_rna_seq/analysis/03_tensorqtl/genotypes/INTERVAL_b38_autosomes_RNAseqPhase1_biallelic_all_MAF0.005" pr = genotypeio.PlinkReader(plink_prefix_path) genotype_df = pd.DataFrame(pr.get_all_genotypes(), index=pr.bim['snp'], columns=pr.fam['iid']) variant_df = pr.bim.set_index('snp')[['chrom', 'pos']] # cis # Cis gene-level mapping pheno_df_noACE2 = phenotype_df.drop("ENSG00000130234") phenopos_df_noACE2 = phenotype_pos_df.drop("ENSG00000130234") pheno_df_noACE2 = pheno_df_noACE2.drop("ENSG00000184012") phenopos_df_noACE2 = phenopos_df_noACE2.drop("ENSG00000184012") cis_df = cis.map_cis(genotype_df, variant_df, pheno_df_noACE2, phenopos_df_noACE2, covariates_peer_df) tensorqtl.calculate_qvalues(cis_df, qvalue_lambda=0) cis_df.to_csv(outdir + "tensorqtl_cis_MAF0.005_cisPerGene_chr1.csv", index=True, index_label="Phenotype") # Cis nominal mapping cisnom_df = cis.map_nominal(genotype_df, variant_df, pheno_df_noACE2, phenopos_df_noACE2, covariates_peer_df, prefix=covdir + "tensorqtl_cis_MAF0.005_cisNominal_covid19") # Conditional analysis