def test_gwas_from_source(self): #full format, OR+SE (which is like beta+se) gwas_format = { "column_snp":"SNPID", "column_non_effect_allele":"A2", "column_effect_allele":"A1", "column_or":"OR", "column_se":"SE", "column_chromosome":"HG19CHRC", "column_position":"BP" } source = GWASUtilities.gwas_filtered_source("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz") gwas = GWAS.load_gwas(source, gwas_format) assert_gwas_zscore_fbse(self, gwas) source = GWASUtilities.gwas_filtered_source("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", snps={"rs940550", "rs6650104", "rs61770173"}, snp_column_name="SNPID") gwas = GWAS.load_gwas(source, gwas_format) numpy.testing.assert_array_equal(gwas[SNP], pandas.Series(["rs940550", "rs6650104", "rs61770173", ], dtype=numpy.str)) numpy.testing.assert_array_equal(gwas[EFFECT_ALLELE], pandas.Series(["C", "T", "A"], dtype=numpy.str)) numpy.testing.assert_array_equal(gwas[NON_EFFECT_ALLELE], pandas.Series(["G", "C", "C"], dtype=numpy.str)) numpy.testing.assert_array_equal(gwas[CHROMOSOME], pandas.Series(["chr1", "chr1", "chr22"], dtype=numpy.str)) numpy.testing.assert_allclose(gwas[ZSCORE], pandas.Series([-1.254557, 0.974874, -0.232505],dtype=numpy.float32), rtol=0.001) numpy.testing.assert_allclose(gwas[BETA], pandas.Series([-0.0217038334437866, 0.0193025022544974, -0.00369682484428976], dtype=numpy.float32), rtol=0.001) numpy.testing.assert_allclose(gwas[SE], pandas.Series([0.0173, 0.0198, 0.0159], dtype=numpy.float32), rtol=0.001)
def test_gwas_from_source(self): #full format, OR+SE (which is like beta+se) gwas_format = { "column_snp": "SNPID", "column_non_effect_allele": "A2", "column_effect_allele": "A1", "column_or": "OR", "column_se": "SE", "column_chromosome": "HG19CHRC", "column_position": "BP" } gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format, force_special_handling=True) assert_gwas_zscore_fbse(self, gwas) gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format, snps={"rs940550", "rs6650104", "rs61770173"}) numpy.testing.assert_array_equal( gwas[SNP], pandas.Series([ "rs940550", "rs6650104", "rs61770173", ], dtype=numpy.str)) numpy.testing.assert_array_equal( gwas[EFFECT_ALLELE], pandas.Series(["C", "T", "A"], dtype=numpy.str)) numpy.testing.assert_array_equal( gwas[NON_EFFECT_ALLELE], pandas.Series(["G", "C", "C"], dtype=numpy.str)) numpy.testing.assert_array_equal( gwas[CHROMOSOME], pandas.Series(["chr1", "chr1", "chr22"], dtype=numpy.str)) numpy.testing.assert_allclose(gwas[ZSCORE], pandas.Series( [-1.254557, 0.974874, -0.232505], dtype=numpy.float32), rtol=0.001) numpy.testing.assert_allclose( gwas[BETA], pandas.Series([ -0.0217038334437866, 0.0193025022544974, -0.00369682484428976 ], dtype=numpy.float32), rtol=0.001) numpy.testing.assert_allclose(gwas[SE], pandas.Series([0.0173, 0.0198, 0.0159], dtype=numpy.float32), rtol=0.001)
def build_betas(args, model, gwas_format, name): logging.info("Building beta for %s and %s", name, args.model_db_path if args.model_db_path else "no database") load_from = os.path.join(args.gwas_folder, name) if args.gwas_folder else name snps = model.snps() if model else None b = GWAS.load_gwas(load_from, gwas_format, snps=snps, separator=args.separator, skip_until_header=args.skip_until_header, handle_empty_columns=args.handle_empty_columns, input_pvalue_fix=args.input_pvalue_fix) if model is not None: PF = PredictionModel.WDBQF base = model.weights[[ PF.K_RSID, PF.K_EFFECT_ALLELE, PF.K_NON_EFFECT_ALLELE ]].drop_duplicates() b = GWASAndModels.align_data_to_alleles(b, base, Constants.SNP, PF.K_RSID) b = b.fillna("NA") keep = [GWAS.SNP, GWAS.ZSCORE] if GWAS.BETA in b: keep.append(GWAS.BETA) b = b[keep] return b
def build_betas(args, model, gwas_format, name): logging.info("Building beta for %s and %s", name, args.model_db_path if args.model_db_path else "no database") load_from = os.path.join(args.gwas_folder, name) if model or args.skip_until_header: snps = model.snps() if model else None snp_column_name = args.snp_column if model else None load_from = GWASUtilities.gwas_filtered_source( load_from, snps=snps, snp_column_name=snp_column_name, skip_until_header=args.skip_until_header, separator=args.separator) sep = '\s+' if args.separator is None else args.separator b = GWAS.load_gwas(load_from, gwas_format, sep=sep) if model is not None: PF = PredictionModel.WDBQF base = model.weights[[ PF.K_RSID, PF.K_EFFECT_ALLELE, PF.K_NON_EFFECT_ALLELE ]].drop_duplicates() b = align_data_to_alleles(b, base, Constants.SNP, PF.K_RSID) b = b.fillna("NA") keep = [GWAS.SNP, GWAS.ZSCORE] if GWAS.BETA in b: keep.append(GWAS.BETA) b = b[keep] return b
def build_betas(args, model, gwas_format, name): logging.info("Building beta for %s and %s", name, args.model_db_path if args.model_db_path else "no database") load_from = os.path.join(args.gwas_folder, name) if args.gwas_folder else name snps = model.snps() if model else None b = GWAS.load_gwas(load_from, gwas_format, snps=snps, separator=args.separator, skip_until_header=args.skip_until_header, handle_empty_columns=args.handle_empty_columns, input_pvalue_fix=args.input_pvalue_fix, keep_non_rsid=args.keep_non_rsid) if model is not None: PF = PredictionModel.WDBQF base = model.weights[[PF.K_RSID, PF.K_EFFECT_ALLELE, PF.K_NON_EFFECT_ALLELE]].drop_duplicates() b = GWASAndModels.align_data_to_alleles(b, base, Constants.SNP, PF.K_RSID) b = b.fillna("NA") keep = [GWAS.SNP, GWAS.ZSCORE] if GWAS.BETA in b: keep.append(GWAS.BETA) b = b[keep] return b
def test_load_gwas(self): gwas_format = { "column_snp":"SNPID", "column_non_effect_allele":"A2", "column_effect_allele":"A1", } gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format, strict=False) assert_basic_gwas(self, gwas) #full format, OR+SE (which is like beta+se) gwas_format = { "column_snp":"SNPID", "column_non_effect_allele":"A2", "column_effect_allele":"A1", "column_or":"OR", "column_se":"SE", "column_chromosome":"HG19CHRC", "column_position":"BP" } gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format) assert_gwas_zscore_fbse(self, gwas) # full format, beta+SE gwas_format = { "column_snp":"SNPID", "column_non_effect_allele":"A2", "column_effect_allele":"A1", "column_beta":"BETA", "column_se":"SE", "column_chromosome":"HG19CHRC", "column_position":"BP" } gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format) assert_gwas_zscore_fbse(self, gwas) # full format, pvalue+beta gwas_format = { "column_snp":"SNPID", "column_non_effect_allele":"A2", "column_effect_allele":"A1", "column_beta":"BETA", "column_pvalue":"P", "column_chromosome":"HG19CHRC", "column_position":"BP" } gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format) assert_gwas_zscore_pb(self, gwas) # full format, pvalue+beta_sign gwas_format = { "column_snp":"SNPID", "column_non_effect_allele":"A2", "column_effect_allele":"A1", "column_beta_sign":"BETA_SIGN", "column_pvalue":"P", "column_chromosome":"HG19CHRC", "column_position":"BP" } gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format) assert_gwas_zscore_pb(self, gwas) # full format, pvalue+or gwas_format = { "column_snp":"SNPID", "column_non_effect_allele":"A2", "column_effect_allele":"A1", "column_or":"OR", "column_pvalue":"P", "column_chromosome":"HG19CHRC", "column_position":"BP" } gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format) assert_gwas_zscore_pb(self, gwas)
def build_betas(args, model, gwas_format, name, model_snp_map): logging.info("Building beta for %s and %s", name, args.model_db_path if args.model_db_path else "no database") load_from = os.path.join(args.gwas_folder, name) if args.gwas_folder else name snps = model.snps() if model else None b = GWAS.load_gwas(load_from, gwas_format, snps=snps, separator=args.separator, skip_until_header=args.skip_until_header, handle_empty_columns=args.handle_empty_columns, input_pvalue_fix=args.input_pvalue_fix, keep_non_rsid=args.keep_non_rsid) if model_snp_map: logging.info("Loading mapping") PF = PredictionModel.WDBQF snp_map = pandas.read_table(model_snp_map) snp_map_ = snp_map.rename(columns={ "a0": PF.K_NON_EFFECT_ALLELE, "a1": PF.K_EFFECT_ALLELE })[[ PF.K_RSID, PF.K_EFFECT_ALLELE, PF.K_NON_EFFECT_ALLELE, "panel_variant_id", "panel_variant_a0", "panel_variant_a1", "swap" ]].drop_duplicates() logging.info("Mapping variants") columns = [x for x in b.columns.values] b = GWASAndModels.align_data_to_alleles(b, snp_map_, Constants.SNP, PF.K_RSID) if GWAS.ZSCORE in b: b = b.assign(zscore=b.zscore * b.swap) if GWAS.BETA in b: b = b.assign(beta=b.beta * b.swap) b = b.rename(columns={GWAS.SNP:"gwas_snp", GWAS.EFFECT_ALLELE:"gwas_effect_allele", GWAS.NON_EFFECT_ALLELE:"gwas_non_effect_allele"})\ .drop(columns=[GWASAndModels.EA_BASE, GWASAndModels.NEA_BASE])\ .rename(columns={"panel_variant_id":GWAS.SNP, "panel_variant_a0":GWASAndModels.NEA, "panel_variant_a1":GWASAndModels.EA})\ [["gwas_snp", "gwas_effect_allele", "gwas_non_effect_allele"]+columns] if model is not None: logging.info("Aligning GWAS to models") PF = PredictionModel.WDBQF base = model.weights[[ PF.K_RSID, PF.K_EFFECT_ALLELE, PF.K_NON_EFFECT_ALLELE ]].drop_duplicates() b = GWASAndModels.align_data_to_alleles(b, base, Constants.SNP, PF.K_RSID) b = b.drop(columns=[GWASAndModels.EA_BASE, GWASAndModels.NEA_BASE]) b = b.fillna("NA") if model is not None: logging.info("Trimming output") keep = [GWAS.SNP, GWAS.ZSCORE] if GWAS.BETA in b: keep.append(GWAS.BETA) b = b[keep] return b