Ejemplo n.º 1
0
# load phenotypes and covariates
phenotype_df, phenotype_pos_df = tensorqtl.read_phenotype_bed(expression_bed)
covariates_df = pd.read_csv(covariates_file, sep='\t', index_col=0).T

pr = genotypeio.PlinkReader(plink_prefix_path, exclude_chrs=excluded_chr_list)
genotype_df = pr.load_genotypes()
variant_df = pr.bim.set_index('snp')[['chrom', 'pos']]

if mode == 'cis':
    # cis-QTL: empirical p-values for phenotypes
    if excluded_chr_list:
        cis_df = cis.map_cis(
            genotype_df,
            variant_df,
            phenotype_df.loc[phenotype_pos_df['chr'] == chr_id],
            phenotype_pos_df.loc[phenotype_pos_df['chr'] == chr_id],
            covariates_df=covariates_df,
            seed=args.seed)
    else:
        cis_df = cis.map_cis(genotype_df,
                             variant_df,
                             phenotype_df,
                             phenotype_pos_df,
                             covariates_df=covariates_df,
                             seed=args.seed)
    out_file = os.path.join(args.output_dir, prefix + '.cis_qtl.txt.gz')
    cis_df.to_csv(out_file, sep='\t')

elif mode == 'cis_independent':
    cis_df = pd.read_csv(in_cis_addr, sep='\t', index_col=0)
    
#for i in 1:22:
    #pairs_df_+i.to_csv('/lustre03/project/6032391/GROUP/sc_rnaseq/tensorqtl/results/chromatin/cis_caqtl_chr'+i+'_nominal.txt',sep='\t')


# all genes
# cis_df = cis.map_cis(genotype_df, variant_df, phenotype_df, phenotype_pos_df, covariates_df)

# genes on chr18
#cis_df = cis.map_cis(genotype_df, variant_df,
#                     phenotype_df.loc[phenotype_pos_df['chr']=='1'],
#                     phenotype_pos_df.loc[phenotype_pos_df['chr']=='1'],
#                     covariates_df=covariates_df, seed=123456)
                     
cis_df = cis.map_cis(genotype_df, variant_df,
                phenotype_df.loc[phenotype_pos_df['chr'].isin(['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22'])],
                phenotype_pos_df.loc[phenotype_pos_df['chr'].isin(['1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22'])],
                 covariates_df=covariates_df, seed=123456)
                 
cis_df.to_csv('/lustre03/project/6032391/GROUP/sc_rnaseq/tensorqtl/results/expression/Tcis_eqtl_Bcells_permutations.txt',sep='\t')

                
trans_df = trans.map_trans(genotype_df, phenotype_df, covariates_df, batch_size=10000,
                           return_sparse=True, pval_threshold=1e-5, maf_threshold=0.05)

trans_df.to_csv('/lustre03/project/6032391/GROUP/sc_rnaseq/tensorqtl/results/expression/Ttrans_eqtl_Bcells_nominal.txt',sep='\t')


#cis_df.head()
                

Ejemplo n.º 3
0
# load genotypes and variants into data frames
genotype_df = pd.DataFrame(pr.load_genotypes(),
                           index=pr.bim['snp'],
                           columns=pr.fam['iid'])
variant_df = pr.bim.set_index('snp')[['chrom', 'pos']]

# Subset to Dirk's GMPR SNPs of interest
gmpr = pd.read_csv(outpath + "GMPR_variants_for_lookup_20200717_LS_sorted.txt",
                   sep='\t')
gmpr_genotype_df = genotype_df.loc[gmpr['rsID']]
gmpr_genotype_df = gmpr_genotype_df[gmpr_genotype_df['110000315494'].notnull()]
gmpr_variant_df = variant_df.loc[gmpr['rsID']]
gmpr_variant_df = gmpr_variant_df[gmpr_variant_df['chrom'].notnull()]

# Call cis-eQTLs
cis_df = cis.map_cis(gmpr_genotype_df, gmpr_variant_df, phenotype_df,
                     phenotype_pos_df, covariates_df)
tensorqtl.calculate_qvalues(cis_df, qvalue_lambda=0.85)
cis_df.to_csv(outpath + "tensorqtl_cis_cisPerGene_chr" + chr + ".csv",
              index=True,
              index_label="Phenotype")

# Cis nominal mapping
cisnom_df = cis.map_nominal(gmpr_genotype_df,
                            gmpr_variant_df,
                            phenotype_df,
                            phenotype_pos_df,
                            covariates_df,
                            prefix=outpath + "tensorqtl_cis_cisNominal_chr" +
                            chr)
cisnom_df2 = pd.read_parquet(
    outpath + "tensorqtl_cis_cisNominal_chr6.cis_qtl_pairs.6.parquet")
Ejemplo n.º 4
0
                           index=pr.bim['snp'],
                           columns=pr.fam['iid'])
variant_df = pr.bim.set_index('snp')[['chrom', 'pos']]

covariates_df = pd.read_csv(covariates_file, sep='\t',
                            index_col=0).T  # samples x covariates
covariates_df = covariates_df[["age_RNA"]]
# Read in genotypes
pr = genotypeio.PlinkReader(plink_prefix_path)

# Limit to 3 phenotypes to test conditional analysis
phenotype_df = all_phenotype_df[0:15]
phenotype_pos_df = all_phenotype_pos_df[0:15]

# Cis gene-level mapping
cis_df = cis.map_cis(genotype_df, variant_df, phenotype_df, phenotype_pos_df,
                     covariates_df)
tensorqtl.calculate_qvalues(
    cis_df, qvalue_lambda=0)  # lambda of 0 is equivalent to BH correction
cis_df.to_csv(outpath + "tensorqtl_cis_MAF0.005_cisPerGene_3phenotest_chr" +
              chr + ".csv",
              index=True,
              index_label="Phenotype")

# Cis nominal mapping
#cis.map_nominal(genotype_df, variant_df, phenotype_df, phenotype_pos_df, covariates_df, prefix=outpath + "tensorqtl_cis_MAF0.005_cisNominal_3phenotest_chr" + chr)
#cisnom_df = pd.read_parquet(outpath + "tensorqtl_cis_MAF0.005_cisNominal_3phenotest_chr21.cis_qtl_pairs.21.parquet")

# Conditional analysis
indep_df = cis.map_independent(genotype_df,
                               variant_df,
                               cis_df,
Ejemplo n.º 5
0
plink_prefix_path = "/rds/user/jm2294/rds-jmmh2-projects/interval_rna_seq/analysis/03_tensorqtl/genotypes/INTERVAL_b38_autosomes_RNAseqPhase1_biallelic_all_MAF0.005"
pr = genotypeio.PlinkReader(plink_prefix_path)
genotype_df = pd.DataFrame(pr.get_all_genotypes(),
                           index=pr.bim['snp'],
                           columns=pr.fam['iid'])
variant_df = pr.bim.set_index('snp')[['chrom', 'pos']]

# cis
# Cis gene-level mapping
pheno_df_noACE2 = phenotype_df.drop("ENSG00000130234")
phenopos_df_noACE2 = phenotype_pos_df.drop("ENSG00000130234")

pheno_df_noACE2 = pheno_df_noACE2.drop("ENSG00000184012")
phenopos_df_noACE2 = phenopos_df_noACE2.drop("ENSG00000184012")

cis_df = cis.map_cis(genotype_df, variant_df, pheno_df_noACE2,
                     phenopos_df_noACE2, covariates_peer_df)
tensorqtl.calculate_qvalues(cis_df, qvalue_lambda=0)
cis_df.to_csv(outdir + "tensorqtl_cis_MAF0.005_cisPerGene_chr1.csv",
              index=True,
              index_label="Phenotype")

# Cis nominal mapping
cisnom_df = cis.map_nominal(genotype_df,
                            variant_df,
                            pheno_df_noACE2,
                            phenopos_df_noACE2,
                            covariates_peer_df,
                            prefix=covdir +
                            "tensorqtl_cis_MAF0.005_cisNominal_covid19")

# Conditional analysis