Beispiel #1
0
    def test_gwas_from_source(self):
        #full format, OR+SE (which is like beta+se)
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_or":"OR",
            "column_se":"SE",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }

        source = GWASUtilities.gwas_filtered_source("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz")
        gwas = GWAS.load_gwas(source, gwas_format)
        assert_gwas_zscore_fbse(self, gwas)

        source = GWASUtilities.gwas_filtered_source("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", snps={"rs940550", "rs6650104", "rs61770173"}, snp_column_name="SNPID")
        gwas = GWAS.load_gwas(source, gwas_format)

        numpy.testing.assert_array_equal(gwas[SNP], pandas.Series(["rs940550", "rs6650104", "rs61770173", ], dtype=numpy.str))
        numpy.testing.assert_array_equal(gwas[EFFECT_ALLELE], pandas.Series(["C", "T",  "A"], dtype=numpy.str))
        numpy.testing.assert_array_equal(gwas[NON_EFFECT_ALLELE], pandas.Series(["G", "C", "C"], dtype=numpy.str))
        numpy.testing.assert_array_equal(gwas[CHROMOSOME], pandas.Series(["chr1", "chr1",  "chr22"], dtype=numpy.str))
        numpy.testing.assert_allclose(gwas[ZSCORE], pandas.Series([-1.254557, 0.974874, -0.232505],dtype=numpy.float32), rtol=0.001)
        numpy.testing.assert_allclose(gwas[BETA], pandas.Series([-0.0217038334437866, 0.0193025022544974, -0.00369682484428976], dtype=numpy.float32), rtol=0.001)
        numpy.testing.assert_allclose(gwas[SE], pandas.Series([0.0173, 0.0198,  0.0159], dtype=numpy.float32), rtol=0.001)
Beispiel #2
0
    def test_gwas_from_source(self):
        #full format, OR+SE (which is like beta+se)
        gwas_format = {
            "column_snp": "SNPID",
            "column_non_effect_allele": "A2",
            "column_effect_allele": "A1",
            "column_or": "OR",
            "column_se": "SE",
            "column_chromosome": "HG19CHRC",
            "column_position": "BP"
        }

        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz",
                              gwas_format,
                              force_special_handling=True)
        assert_gwas_zscore_fbse(self, gwas)

        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz",
                              gwas_format,
                              snps={"rs940550", "rs6650104", "rs61770173"})

        numpy.testing.assert_array_equal(
            gwas[SNP],
            pandas.Series([
                "rs940550",
                "rs6650104",
                "rs61770173",
            ],
                          dtype=numpy.str))
        numpy.testing.assert_array_equal(
            gwas[EFFECT_ALLELE], pandas.Series(["C", "T", "A"],
                                               dtype=numpy.str))
        numpy.testing.assert_array_equal(
            gwas[NON_EFFECT_ALLELE],
            pandas.Series(["G", "C", "C"], dtype=numpy.str))
        numpy.testing.assert_array_equal(
            gwas[CHROMOSOME],
            pandas.Series(["chr1", "chr1", "chr22"], dtype=numpy.str))
        numpy.testing.assert_allclose(gwas[ZSCORE],
                                      pandas.Series(
                                          [-1.254557, 0.974874, -0.232505],
                                          dtype=numpy.float32),
                                      rtol=0.001)
        numpy.testing.assert_allclose(
            gwas[BETA],
            pandas.Series([
                -0.0217038334437866, 0.0193025022544974, -0.00369682484428976
            ],
                          dtype=numpy.float32),
            rtol=0.001)
        numpy.testing.assert_allclose(gwas[SE],
                                      pandas.Series([0.0173, 0.0198, 0.0159],
                                                    dtype=numpy.float32),
                                      rtol=0.001)
Beispiel #3
0
def build_betas(args, model, gwas_format, name):
    logging.info("Building beta for %s and %s", name,
                 args.model_db_path if args.model_db_path else "no database")

    load_from = os.path.join(args.gwas_folder,
                             name) if args.gwas_folder else name

    snps = model.snps() if model else None
    b = GWAS.load_gwas(load_from,
                       gwas_format,
                       snps=snps,
                       separator=args.separator,
                       skip_until_header=args.skip_until_header,
                       handle_empty_columns=args.handle_empty_columns,
                       input_pvalue_fix=args.input_pvalue_fix)

    if model is not None:
        PF = PredictionModel.WDBQF
        base = model.weights[[
            PF.K_RSID, PF.K_EFFECT_ALLELE, PF.K_NON_EFFECT_ALLELE
        ]].drop_duplicates()
        b = GWASAndModels.align_data_to_alleles(b, base, Constants.SNP,
                                                PF.K_RSID)

    b = b.fillna("NA")
    keep = [GWAS.SNP, GWAS.ZSCORE]
    if GWAS.BETA in b: keep.append(GWAS.BETA)
    b = b[keep]
    return b
Beispiel #4
0
def build_betas(args, model, gwas_format, name):
    logging.info("Building beta for %s and %s", name,
                 args.model_db_path if args.model_db_path else "no database")
    load_from = os.path.join(args.gwas_folder, name)
    if model or args.skip_until_header:
        snps = model.snps() if model else None
        snp_column_name = args.snp_column if model else None
        load_from = GWASUtilities.gwas_filtered_source(
            load_from,
            snps=snps,
            snp_column_name=snp_column_name,
            skip_until_header=args.skip_until_header,
            separator=args.separator)
    sep = '\s+' if args.separator is None else args.separator
    b = GWAS.load_gwas(load_from, gwas_format, sep=sep)

    if model is not None:
        PF = PredictionModel.WDBQF
        base = model.weights[[
            PF.K_RSID, PF.K_EFFECT_ALLELE, PF.K_NON_EFFECT_ALLELE
        ]].drop_duplicates()
        b = align_data_to_alleles(b, base, Constants.SNP, PF.K_RSID)

    b = b.fillna("NA")
    keep = [GWAS.SNP, GWAS.ZSCORE]
    if GWAS.BETA in b: keep.append(GWAS.BETA)
    b = b[keep]
    return b
Beispiel #5
0
def build_betas(args, model, gwas_format, name):
    logging.info("Building beta for %s and %s", name, args.model_db_path if args.model_db_path else "no database")

    load_from = os.path.join(args.gwas_folder, name) if args.gwas_folder else name

    snps = model.snps() if model else None
    b = GWAS.load_gwas(load_from, gwas_format, snps=snps, separator=args.separator,
            skip_until_header=args.skip_until_header, handle_empty_columns=args.handle_empty_columns, input_pvalue_fix=args.input_pvalue_fix, keep_non_rsid=args.keep_non_rsid)

    if model is not None:
        PF = PredictionModel.WDBQF
        base = model.weights[[PF.K_RSID, PF.K_EFFECT_ALLELE, PF.K_NON_EFFECT_ALLELE]].drop_duplicates()
        b = GWASAndModels.align_data_to_alleles(b, base, Constants.SNP, PF.K_RSID)

    b = b.fillna("NA")
    keep = [GWAS.SNP, GWAS.ZSCORE]
    if GWAS.BETA in b: keep.append(GWAS.BETA)
    b = b[keep]
    return b
Beispiel #6
0
    def test_load_gwas(self):
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format, strict=False)
        assert_basic_gwas(self, gwas)

        #full format, OR+SE (which is like beta+se)
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_or":"OR",
            "column_se":"SE",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format)
        assert_gwas_zscore_fbse(self, gwas)

        # full format, beta+SE
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_beta":"BETA",
            "column_se":"SE",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format)
        assert_gwas_zscore_fbse(self, gwas)

        # full format, pvalue+beta
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_beta":"BETA",
            "column_pvalue":"P",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format)
        assert_gwas_zscore_pb(self, gwas)

        # full format, pvalue+beta_sign
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_beta_sign":"BETA_SIGN",
            "column_pvalue":"P",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format)
        assert_gwas_zscore_pb(self, gwas)

        # full format, pvalue+or
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_or":"OR",
            "column_pvalue":"P",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format)
        assert_gwas_zscore_pb(self, gwas)
Beispiel #7
0
def build_betas(args, model, gwas_format, name, model_snp_map):
    logging.info("Building beta for %s and %s", name,
                 args.model_db_path if args.model_db_path else "no database")

    load_from = os.path.join(args.gwas_folder,
                             name) if args.gwas_folder else name

    snps = model.snps() if model else None
    b = GWAS.load_gwas(load_from,
                       gwas_format,
                       snps=snps,
                       separator=args.separator,
                       skip_until_header=args.skip_until_header,
                       handle_empty_columns=args.handle_empty_columns,
                       input_pvalue_fix=args.input_pvalue_fix,
                       keep_non_rsid=args.keep_non_rsid)

    if model_snp_map:
        logging.info("Loading mapping")
        PF = PredictionModel.WDBQF
        snp_map = pandas.read_table(model_snp_map)
        snp_map_ = snp_map.rename(columns={
            "a0": PF.K_NON_EFFECT_ALLELE,
            "a1": PF.K_EFFECT_ALLELE
        })[[
            PF.K_RSID, PF.K_EFFECT_ALLELE, PF.K_NON_EFFECT_ALLELE,
            "panel_variant_id", "panel_variant_a0", "panel_variant_a1", "swap"
        ]].drop_duplicates()

        logging.info("Mapping variants")
        columns = [x for x in b.columns.values]
        b = GWASAndModels.align_data_to_alleles(b, snp_map_, Constants.SNP,
                                                PF.K_RSID)
        if GWAS.ZSCORE in b:
            b = b.assign(zscore=b.zscore * b.swap)
        if GWAS.BETA in b:
            b = b.assign(beta=b.beta * b.swap)
        b = b.rename(columns={GWAS.SNP:"gwas_snp", GWAS.EFFECT_ALLELE:"gwas_effect_allele", GWAS.NON_EFFECT_ALLELE:"gwas_non_effect_allele"})\
                .drop(columns=[GWASAndModels.EA_BASE, GWASAndModels.NEA_BASE])\
                .rename(columns={"panel_variant_id":GWAS.SNP, "panel_variant_a0":GWASAndModels.NEA, "panel_variant_a1":GWASAndModels.EA})\
                [["gwas_snp", "gwas_effect_allele", "gwas_non_effect_allele"]+columns]

    if model is not None:
        logging.info("Aligning GWAS to models")
        PF = PredictionModel.WDBQF
        base = model.weights[[
            PF.K_RSID, PF.K_EFFECT_ALLELE, PF.K_NON_EFFECT_ALLELE
        ]].drop_duplicates()
        b = GWASAndModels.align_data_to_alleles(b, base, Constants.SNP,
                                                PF.K_RSID)
        b = b.drop(columns=[GWASAndModels.EA_BASE, GWASAndModels.NEA_BASE])

    b = b.fillna("NA")

    if model is not None:
        logging.info("Trimming output")
        keep = [GWAS.SNP, GWAS.ZSCORE]
        if GWAS.BETA in b: keep.append(GWAS.BETA)
        b = b[keep]

    return b
Beispiel #8
0
    def test_load_gwas(self):
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format, strict=False)
        assert_basic_gwas(self, gwas)

        #full format, OR+SE (which is like beta+se)
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_or":"OR",
            "column_se":"SE",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format)
        assert_gwas_zscore_fbse(self, gwas)

        # full format, beta+SE
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_beta":"BETA",
            "column_se":"SE",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format)
        assert_gwas_zscore_fbse(self, gwas)

        # full format, pvalue+beta
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_beta":"BETA",
            "column_pvalue":"P",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format)
        assert_gwas_zscore_pb(self, gwas)

        # full format, pvalue+beta_sign
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_beta_sign":"BETA_SIGN",
            "column_pvalue":"P",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format)
        assert_gwas_zscore_pb(self, gwas)

        # full format, pvalue+or
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_or":"OR",
            "column_pvalue":"P",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format)
        assert_gwas_zscore_pb(self, gwas)