예제 #1
0
    def test_gwas_from_source(self):
        #full format, OR+SE (which is like beta+se)
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_or":"OR",
            "column_se":"SE",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }

        source = GWASUtilities.gwas_filtered_source("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz")
        gwas = GWAS.load_gwas(source, gwas_format)
        assert_gwas_zscore_fbse(self, gwas)

        source = GWASUtilities.gwas_filtered_source("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", snps={"rs940550", "rs6650104", "rs61770173"}, snp_column_name="SNPID")
        gwas = GWAS.load_gwas(source, gwas_format)

        numpy.testing.assert_array_equal(gwas[SNP], pandas.Series(["rs940550", "rs6650104", "rs61770173", ], dtype=numpy.str))
        numpy.testing.assert_array_equal(gwas[EFFECT_ALLELE], pandas.Series(["C", "T",  "A"], dtype=numpy.str))
        numpy.testing.assert_array_equal(gwas[NON_EFFECT_ALLELE], pandas.Series(["G", "C", "C"], dtype=numpy.str))
        numpy.testing.assert_array_equal(gwas[CHROMOSOME], pandas.Series(["chr1", "chr1",  "chr22"], dtype=numpy.str))
        numpy.testing.assert_allclose(gwas[ZSCORE], pandas.Series([-1.254557, 0.974874, -0.232505],dtype=numpy.float32), rtol=0.001)
        numpy.testing.assert_allclose(gwas[BETA], pandas.Series([-0.0217038334437866, 0.0193025022544974, -0.00369682484428976], dtype=numpy.float32), rtol=0.001)
        numpy.testing.assert_allclose(gwas[SE], pandas.Series([0.0173, 0.0198,  0.0159], dtype=numpy.float32), rtol=0.001)
예제 #2
0
    def test_gwas_from_source(self):
        #full format, OR+SE (which is like beta+se)
        gwas_format = {
            "column_snp": "SNPID",
            "column_non_effect_allele": "A2",
            "column_effect_allele": "A1",
            "column_or": "OR",
            "column_se": "SE",
            "column_chromosome": "HG19CHRC",
            "column_position": "BP"
        }

        source = GWASUtilities.gwas_filtered_source(
            "tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz")
        gwas = GWAS.load_gwas(source, gwas_format)
        assert_gwas_zscore_fbse(self, gwas)

        source = GWASUtilities.gwas_filtered_source(
            "tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz",
            snps={"rs940550", "rs6650104", "rs61770173"},
            snp_column_name="SNPID")
        gwas = GWAS.load_gwas(source, gwas_format)

        numpy.testing.assert_array_equal(
            gwas[SNP],
            pandas.Series([
                "rs940550",
                "rs6650104",
                "rs61770173",
            ],
                          dtype=numpy.str))
        numpy.testing.assert_array_equal(
            gwas[EFFECT_ALLELE], pandas.Series(["C", "T", "A"],
                                               dtype=numpy.str))
        numpy.testing.assert_array_equal(
            gwas[NON_EFFECT_ALLELE],
            pandas.Series(["G", "C", "C"], dtype=numpy.str))
        numpy.testing.assert_array_equal(
            gwas[CHROMOSOME],
            pandas.Series(["chr1", "chr1", "chr22"], dtype=numpy.str))
        numpy.testing.assert_allclose(gwas[ZSCORE],
                                      pandas.Series(
                                          [-1.254557, 0.974874, -0.232505],
                                          dtype=numpy.float32),
                                      rtol=0.001)
        numpy.testing.assert_allclose(
            gwas[BETA],
            pandas.Series([
                -0.0217038334437866, 0.0193025022544974, -0.00369682484428976
            ],
                          dtype=numpy.float32),
            rtol=0.001)
        numpy.testing.assert_allclose(gwas[SE],
                                      pandas.Series([0.0173, 0.0198, 0.0159],
                                                    dtype=numpy.float32),
                                      rtol=0.001)
예제 #3
0
def build_betas(args, model, gwas_format, name):
    logging.info("Building beta for %s and %s", name,
                 args.model_db_path if args.model_db_path else "no database")
    load_from = os.path.join(args.gwas_folder, name)
    if model or args.skip_until_header:
        snps = model.snps() if model else None
        snp_column_name = args.snp_column if model else None
        load_from = GWASUtilities.gwas_filtered_source(
            load_from,
            snps=snps,
            snp_column_name=snp_column_name,
            skip_until_header=args.skip_until_header,
            separator=args.separator)
    sep = '\s+' if args.separator is None else args.separator
    b = GWAS.load_gwas(load_from,
                       gwas_format,
                       sep=sep,
                       input_pvalue_fix=args.input_pvalue_fix)

    if model is not None:
        PF = PredictionModel.WDBQF
        base = model.weights[[
            PF.K_RSID, PF.K_EFFECT_ALLELE, PF.K_NON_EFFECT_ALLELE
        ]].drop_duplicates()
        b = align_data_to_alleles(b, base, Constants.SNP, PF.K_RSID)

    b = b.fillna("NA")
    keep = [GWAS.SNP, GWAS.ZSCORE]
    if GWAS.BETA in b: keep.append(GWAS.BETA)
    b = b[keep]
    return b