def test_gwas_from_source(self): #full format, OR+SE (which is like beta+se) gwas_format = { "column_snp":"SNPID", "column_non_effect_allele":"A2", "column_effect_allele":"A1", "column_or":"OR", "column_se":"SE", "column_chromosome":"HG19CHRC", "column_position":"BP" } source = GWASUtilities.gwas_filtered_source("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz") gwas = GWAS.load_gwas(source, gwas_format) assert_gwas_zscore_fbse(self, gwas) source = GWASUtilities.gwas_filtered_source("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", snps={"rs940550", "rs6650104", "rs61770173"}, snp_column_name="SNPID") gwas = GWAS.load_gwas(source, gwas_format) numpy.testing.assert_array_equal(gwas[SNP], pandas.Series(["rs940550", "rs6650104", "rs61770173", ], dtype=numpy.str)) numpy.testing.assert_array_equal(gwas[EFFECT_ALLELE], pandas.Series(["C", "T", "A"], dtype=numpy.str)) numpy.testing.assert_array_equal(gwas[NON_EFFECT_ALLELE], pandas.Series(["G", "C", "C"], dtype=numpy.str)) numpy.testing.assert_array_equal(gwas[CHROMOSOME], pandas.Series(["chr1", "chr1", "chr22"], dtype=numpy.str)) numpy.testing.assert_allclose(gwas[ZSCORE], pandas.Series([-1.254557, 0.974874, -0.232505],dtype=numpy.float32), rtol=0.001) numpy.testing.assert_allclose(gwas[BETA], pandas.Series([-0.0217038334437866, 0.0193025022544974, -0.00369682484428976], dtype=numpy.float32), rtol=0.001) numpy.testing.assert_allclose(gwas[SE], pandas.Series([0.0173, 0.0198, 0.0159], dtype=numpy.float32), rtol=0.001)
def test_gwas_from_source(self): #full format, OR+SE (which is like beta+se) gwas_format = { "column_snp": "SNPID", "column_non_effect_allele": "A2", "column_effect_allele": "A1", "column_or": "OR", "column_se": "SE", "column_chromosome": "HG19CHRC", "column_position": "BP" } source = GWASUtilities.gwas_filtered_source( "tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz") gwas = GWAS.load_gwas(source, gwas_format) assert_gwas_zscore_fbse(self, gwas) source = GWASUtilities.gwas_filtered_source( "tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", snps={"rs940550", "rs6650104", "rs61770173"}, snp_column_name="SNPID") gwas = GWAS.load_gwas(source, gwas_format) numpy.testing.assert_array_equal( gwas[SNP], pandas.Series([ "rs940550", "rs6650104", "rs61770173", ], dtype=numpy.str)) numpy.testing.assert_array_equal( gwas[EFFECT_ALLELE], pandas.Series(["C", "T", "A"], dtype=numpy.str)) numpy.testing.assert_array_equal( gwas[NON_EFFECT_ALLELE], pandas.Series(["G", "C", "C"], dtype=numpy.str)) numpy.testing.assert_array_equal( gwas[CHROMOSOME], pandas.Series(["chr1", "chr1", "chr22"], dtype=numpy.str)) numpy.testing.assert_allclose(gwas[ZSCORE], pandas.Series( [-1.254557, 0.974874, -0.232505], dtype=numpy.float32), rtol=0.001) numpy.testing.assert_allclose( gwas[BETA], pandas.Series([ -0.0217038334437866, 0.0193025022544974, -0.00369682484428976 ], dtype=numpy.float32), rtol=0.001) numpy.testing.assert_allclose(gwas[SE], pandas.Series([0.0173, 0.0198, 0.0159], dtype=numpy.float32), rtol=0.001)
def build_betas(args, model, gwas_format, name): logging.info("Building beta for %s and %s", name, args.model_db_path if args.model_db_path else "no database") load_from = os.path.join(args.gwas_folder, name) if model or args.skip_until_header: snps = model.snps() if model else None snp_column_name = args.snp_column if model else None load_from = GWASUtilities.gwas_filtered_source( load_from, snps=snps, snp_column_name=snp_column_name, skip_until_header=args.skip_until_header, separator=args.separator) sep = '\s+' if args.separator is None else args.separator b = GWAS.load_gwas(load_from, gwas_format, sep=sep, input_pvalue_fix=args.input_pvalue_fix) if model is not None: PF = PredictionModel.WDBQF base = model.weights[[ PF.K_RSID, PF.K_EFFECT_ALLELE, PF.K_NON_EFFECT_ALLELE ]].drop_duplicates() b = align_data_to_alleles(b, base, Constants.SNP, PF.K_RSID) b = b.fillna("NA") keep = [GWAS.SNP, GWAS.ZSCORE] if GWAS.BETA in b: keep.append(GWAS.BETA) b = b[keep] return b