def readGWAS(args):
    start = timer()
    validate(args)
    regexp = re.compile(args.gwas_file_pattern) if args.gwas_file_pattern else  None
    names = Utilities.contentsWithRegexpFromFolder(args.gwas_folder, regexp)
    names.sort() #cosmetic, because different filesystems/OS yield folders in different order

    if len(names) == 0:
        msg = "No GWAS files found on %s with pattern %s" % (args.gwas_folder, args.gwas_file_pattern,)
        raise Exceptions.ReportableException(msg)
    
    print "INFO: Reading GWAS data"
    gwas_format = GWASUtilities.gwas_format_from_args(args)
    GWAS.validate_format_basic(gwas_format)
    GWAS.validate_format_for_strict(gwas_format)
    #model = PredictionModel.load_model(args.model_db_path) if args.model_db_path else None
    model = None
    # dataframe
    r = pandas.DataFrame()
    for name in names:
        b = build_betas(args, model, gwas_format, name)
        r = pandas.concat([r,b])
    end = timer()
    logging.info("Successfully parsed input gwas in %s seconds"%(str(end-start)))
    print("Successfully parsed input gwas in %s seconds"%(str(end-start)))
    return r
Exemple #2
0
    def test_gwas_from_source(self):
        #full format, OR+SE (which is like beta+se)
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_or":"OR",
            "column_se":"SE",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }

        source = GWASUtilities.gwas_filtered_source("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz")
        gwas = GWAS.load_gwas(source, gwas_format)
        assert_gwas_zscore_fbse(self, gwas)

        source = GWASUtilities.gwas_filtered_source("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", snps={"rs940550", "rs6650104", "rs61770173"}, snp_column_name="SNPID")
        gwas = GWAS.load_gwas(source, gwas_format)

        numpy.testing.assert_array_equal(gwas[SNP], pandas.Series(["rs940550", "rs6650104", "rs61770173", ], dtype=numpy.str))
        numpy.testing.assert_array_equal(gwas[EFFECT_ALLELE], pandas.Series(["C", "T",  "A"], dtype=numpy.str))
        numpy.testing.assert_array_equal(gwas[NON_EFFECT_ALLELE], pandas.Series(["G", "C", "C"], dtype=numpy.str))
        numpy.testing.assert_array_equal(gwas[CHROMOSOME], pandas.Series(["chr1", "chr1",  "chr22"], dtype=numpy.str))
        numpy.testing.assert_allclose(gwas[ZSCORE], pandas.Series([-1.254557, 0.974874, -0.232505],dtype=numpy.float32), rtol=0.001)
        numpy.testing.assert_allclose(gwas[BETA], pandas.Series([-0.0217038334437866, 0.0193025022544974, -0.00369682484428976], dtype=numpy.float32), rtol=0.001)
        numpy.testing.assert_allclose(gwas[SE], pandas.Series([0.0173, 0.0198,  0.0159], dtype=numpy.float32), rtol=0.001)
Exemple #3
0
def run(args):
    start = timer()
    validate(args)

    if args.gwas_folder:
        regexp = re.compile(
            args.gwas_file_pattern) if args.gwas_file_pattern else None
        names = Utilities.contentsWithRegexpFromFolder(args.gwas_folder,
                                                       regexp)
        names.sort(
        )  #cosmetic, because different filesystems/OS yield folders in different order

        if len(names) == 0:
            msg = "No GWAS files found on %s with pattern %s" % (
                args.gwas_folder,
                args.gwas_file_pattern,
            )
            raise Exceptions.ReportableException(msg)
    else:
        names = [args.gwas_file]

    gwas_format = GWASUtilities.gwas_format_from_args(args)
    GWAS.validate_format_basic(gwas_format)
    GWAS.validate_format_for_strict(gwas_format)
    model = PredictionModel.load_model(
        args.model_db_path,
        args.model_db_snp_key) if args.model_db_path else None

    if args.output_folder:
        if not os.path.exists(args.output_folder):
            os.makedirs(args.output_folder)

        for name in names:
            output_path = os.path.join(args.output_folder, name)
            if not ".gz" in output_path:
                output_path += ".gz"
            if os.path.exists(output_path):
                logging.info(
                    "%s already exists, delete it if you want it to be done again",
                    output_path)
                continue

            b = build_betas(args, model, gwas_format, name)
            c = "gzip" if ".gz" in name else None
            b.to_csv(output_path, sep="\t", index=False, compression=c)
        end = timer()
        logging.info("Successfully ran GWAS input processing in %s seconds" %
                     (str(end - start)))
    else:
        r = pandas.DataFrame()
        for name in names:
            b = build_betas(args, model, gwas_format, name)
            r = pandas.concat([r, b])
        end = timer()
        logging.info("Successfully parsed input gwas in %s seconds" %
                     (str(end - start)))

        return r
Exemple #4
0
    def test_gwas_from_source(self):
        #full format, OR+SE (which is like beta+se)
        gwas_format = {
            "column_snp": "SNPID",
            "column_non_effect_allele": "A2",
            "column_effect_allele": "A1",
            "column_or": "OR",
            "column_se": "SE",
            "column_chromosome": "HG19CHRC",
            "column_position": "BP"
        }

        source = GWASUtilities.gwas_filtered_source(
            "tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz")
        gwas = GWAS.load_gwas(source, gwas_format)
        assert_gwas_zscore_fbse(self, gwas)

        source = GWASUtilities.gwas_filtered_source(
            "tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz",
            snps={"rs940550", "rs6650104", "rs61770173"},
            snp_column_name="SNPID")
        gwas = GWAS.load_gwas(source, gwas_format)

        numpy.testing.assert_array_equal(
            gwas[SNP],
            pandas.Series([
                "rs940550",
                "rs6650104",
                "rs61770173",
            ],
                          dtype=numpy.str))
        numpy.testing.assert_array_equal(
            gwas[EFFECT_ALLELE], pandas.Series(["C", "T", "A"],
                                               dtype=numpy.str))
        numpy.testing.assert_array_equal(
            gwas[NON_EFFECT_ALLELE],
            pandas.Series(["G", "C", "C"], dtype=numpy.str))
        numpy.testing.assert_array_equal(
            gwas[CHROMOSOME],
            pandas.Series(["chr1", "chr1", "chr22"], dtype=numpy.str))
        numpy.testing.assert_allclose(gwas[ZSCORE],
                                      pandas.Series(
                                          [-1.254557, 0.974874, -0.232505],
                                          dtype=numpy.float32),
                                      rtol=0.001)
        numpy.testing.assert_allclose(
            gwas[BETA],
            pandas.Series([
                -0.0217038334437866, 0.0193025022544974, -0.00369682484428976
            ],
                          dtype=numpy.float32),
            rtol=0.001)
        numpy.testing.assert_allclose(gwas[SE],
                                      pandas.Series([0.0173, 0.0198, 0.0159],
                                                    dtype=numpy.float32),
                                      rtol=0.001)
def build_betas(args, model, gwas_format, name):
    logging.info("Building beta for %s and %s", name,
                 args.model_db_path if args.model_db_path else "no database")

    load_from = os.path.join(args.gwas_folder,
                             name) if args.gwas_folder else name

    snps = model.snps() if model else None
    b = GWAS.load_gwas(load_from,
                       gwas_format,
                       snps=snps,
                       separator=args.separator,
                       skip_until_header=args.skip_until_header,
                       handle_empty_columns=args.handle_empty_columns,
                       input_pvalue_fix=args.input_pvalue_fix)

    if model is not None:
        PF = PredictionModel.WDBQF
        base = model.weights[[
            PF.K_RSID, PF.K_EFFECT_ALLELE, PF.K_NON_EFFECT_ALLELE
        ]].drop_duplicates()
        b = GWASAndModels.align_data_to_alleles(b, base, Constants.SNP,
                                                PF.K_RSID)

    b = b.fillna("NA")
    keep = [GWAS.SNP, GWAS.ZSCORE]
    if GWAS.BETA in b: keep.append(GWAS.BETA)
    b = b[keep]
    return b
Exemple #6
0
def build_betas(args, model, gwas_format, name):
    logging.info("Building beta for %s and %s", name,
                 args.model_db_path if args.model_db_path else "no database")
    load_from = os.path.join(args.gwas_folder, name)
    if model or args.skip_until_header:
        snps = model.snps() if model else None
        snp_column_name = args.snp_column if model else None
        load_from = GWASUtilities.gwas_filtered_source(
            load_from,
            snps=snps,
            snp_column_name=snp_column_name,
            skip_until_header=args.skip_until_header,
            separator=args.separator)
    sep = '\s+' if args.separator is None else args.separator
    b = GWAS.load_gwas(load_from, gwas_format, sep=sep)

    if model is not None:
        PF = PredictionModel.WDBQF
        base = model.weights[[
            PF.K_RSID, PF.K_EFFECT_ALLELE, PF.K_NON_EFFECT_ALLELE
        ]].drop_duplicates()
        b = align_data_to_alleles(b, base, Constants.SNP, PF.K_RSID)

    b = b.fillna("NA")
    keep = [GWAS.SNP, GWAS.ZSCORE]
    if GWAS.BETA in b: keep.append(GWAS.BETA)
    b = b[keep]
    return b
Exemple #7
0
def run(args):
    start = timer()
    validate(args)

    if args.gwas_folder:
        regexp = re.compile(args.gwas_file_pattern) if args.gwas_file_pattern else  None
        names = Utilities.contentsWithRegexpFromFolder(args.gwas_folder, regexp)
        names.sort() #cosmetic, because different filesystems/OS yield folders in different order

        if len(names) == 0:
            msg = "No GWAS files found on %s with pattern %s" % (args.gwas_folder, args.gwas_file_pattern,)
            raise Exceptions.ReportableException(msg)
    else:
        names = [args.gwas_file]

    gwas_format = GWASUtilities.gwas_format_from_args(args)
    GWAS.validate_format_basic(gwas_format)
    GWAS.validate_format_for_strict(gwas_format)
    model = PredictionModel.load_model(args.model_db_path, args.model_db_snp_key) if args.model_db_path else None

    if args.output_folder:
        if not os.path.exists(args.output_folder):
            os.makedirs(args.output_folder)

        for name in names:
            output_path = os.path.join(args.output_folder, name)
            if not ".gz" in output_path:
                output_path += ".gz"
            if os.path.exists(output_path):
                logging.info("%s already exists, delete it if you want it to be done again", output_path)
                continue

            b = build_betas(args, model, gwas_format, name)
            c = "gzip" if ".gz" in name else None
            b.to_csv(output_path, sep="\t", index=False, compression=c)
        end = timer()
        logging.info("Successfully ran GWAS input processing in %s seconds" %(str(end - start)))
    else:
        r = pandas.DataFrame()
        for name in names:
            b = build_betas(args, model, gwas_format, name)
            r = pandas.concat([r,b])
        end = timer()
        logging.info("Successfully parsed input gwas in %s seconds"%(str(end-start)))

        return r
Exemple #8
0
def build_betas(args, model, gwas_format, name):
    logging.info("Building beta for %s and %s", name, args.model_db_path if args.model_db_path else "no database")

    load_from = os.path.join(args.gwas_folder, name) if args.gwas_folder else name

    snps = model.snps() if model else None
    b = GWAS.load_gwas(load_from, gwas_format, snps=snps, separator=args.separator,
            skip_until_header=args.skip_until_header, handle_empty_columns=args.handle_empty_columns, input_pvalue_fix=args.input_pvalue_fix, keep_non_rsid=args.keep_non_rsid)

    if model is not None:
        PF = PredictionModel.WDBQF
        base = model.weights[[PF.K_RSID, PF.K_EFFECT_ALLELE, PF.K_NON_EFFECT_ALLELE]].drop_duplicates()
        b = GWASAndModels.align_data_to_alleles(b, base, Constants.SNP, PF.K_RSID)

    b = b.fillna("NA")
    keep = [GWAS.SNP, GWAS.ZSCORE]
    if GWAS.BETA in b: keep.append(GWAS.BETA)
    b = b[keep]
    return b
Exemple #9
0
    def test_format(self):
        format = {}
        self.assertIsNone(GWAS._f_snp(format))
        self.assertIsNone(GWAS._f_effect_allele_column(format))
        self.assertIsNone(GWAS._f_non_effect_allele_column(format))
        self.assertIsNone(GWAS._f_pvalue(format))
        self.assertIsNone(GWAS._f_zscore(format))
        self.assertIsNone(GWAS._f_beta(format))
        self.assertIsNone(GWAS._f_beta_sign(format))
        self.assertIsNone(GWAS._f_or(format))
        self.assertIsNone(GWAS._f_se(format))

        _add_basic_to_format(format)
        _add_extra_to_format(format)

        self.assertEqual(GWAS._f_snp(format), "_snp")
        self.assertEqual(GWAS._f_effect_allele_column(format), "_effect_allele")
        self.assertEqual(GWAS._f_non_effect_allele_column(format), "_non_effect_allele")
        self.assertEqual(GWAS._f_pvalue(format), "_pvalue")
        self.assertEqual(GWAS._f_zscore(format), "_zscore")
        self.assertEqual(GWAS._f_beta(format), "_beta")
        self.assertEqual(GWAS._f_beta_sign(format), "_beta_sign")
        self.assertEqual(GWAS._f_or(format), "_or")
        self.assertEqual(GWAS._f_se(format), "_se")
Exemple #10
0
    def test_load_gwas(self):
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format, strict=False)
        assert_basic_gwas(self, gwas)

        #full format, OR+SE (which is like beta+se)
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_or":"OR",
            "column_se":"SE",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format)
        assert_gwas_zscore_fbse(self, gwas)

        # full format, beta+SE
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_beta":"BETA",
            "column_se":"SE",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format)
        assert_gwas_zscore_fbse(self, gwas)

        # full format, pvalue+beta
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_beta":"BETA",
            "column_pvalue":"P",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format)
        assert_gwas_zscore_pb(self, gwas)

        # full format, pvalue+beta_sign
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_beta_sign":"BETA_SIGN",
            "column_pvalue":"P",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format)
        assert_gwas_zscore_pb(self, gwas)

        # full format, pvalue+or
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_or":"OR",
            "column_pvalue":"P",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format)
        assert_gwas_zscore_pb(self, gwas)
Exemple #11
0
    def test_format_validation(self):
        format = {}
        with self.assertRaises(Exceptions.InvalidArguments): GWAS.validate_format_basic(format)
        with self.assertRaises(Exceptions.InvalidArguments): GWAS.validate_format_for_strict(format)

        _add_basic_to_format(format)
        GWAS.validate_format_basic(format)
        with self.assertRaises(Exceptions.InvalidArguments): GWAS.validate_format_for_strict(format)

        format[GWAS.COLUMN_PVALUE] = "_p"
        format[GWAS.COLUMN_SE] = "_se"
        with self.assertRaises(Exceptions.InvalidArguments): GWAS.validate_format_for_strict(format)

        #
        format = {}
        _add_basic_to_format(format)
        format[GWAS.COLUMN_ZSCORE] = "_z"
        GWAS.validate_format_for_strict(format)

        #
        format = {}
        _add_basic_to_format(format)
        format[GWAS.COLUMN_PVALUE] = "_p"
        format[GWAS.COLUMN_BETA] = "_beta"
        GWAS.validate_format_for_strict(format)

        #
        format = {}
        _add_basic_to_format(format)
        format[GWAS.COLUMN_PVALUE] = "_p"
        format[GWAS.COLUMN_BETA] = "_beta_sign"
        GWAS.validate_format_for_strict(format)

        #
        format = {}
        _add_basic_to_format(format)
        format[GWAS.COLUMN_PVALUE] = "_p"
        format[GWAS.COLUMN_OR] = "_or"
        GWAS.validate_format_for_strict(format)

        #
        format = {}
        _add_basic_to_format(format)
        format[GWAS.COLUMN_SE] = "se"
        with self.assertRaises(Exceptions.InvalidArguments): GWAS.validate_format_for_strict(format)

        #
        format = {}
        _add_basic_to_format(format)
        format[GWAS.COLUMN_SE] = "se"
        format[GWAS.COLUMN_OR] = "or"
        GWAS.validate_format_for_strict(format)

        #
        format = {}
        _add_basic_to_format(format)
        format[GWAS.COLUMN_SE] = "se"
        format[GWAS.COLUMN_OR] = "beta"
        GWAS.validate_format_for_strict(format)
Exemple #12
0
    def test_format(self):
        format = {}
        self.assertIsNone(GWAS._f_snp(format))
        self.assertIsNone(GWAS._f_effect_allele_column(format))
        self.assertIsNone(GWAS._f_non_effect_allele_column(format))
        self.assertIsNone(GWAS._f_pvalue(format))
        self.assertIsNone(GWAS._f_zscore(format))
        self.assertIsNone(GWAS._f_beta(format))
        self.assertIsNone(GWAS._f_beta_sign(format))
        self.assertIsNone(GWAS._f_or(format))
        self.assertIsNone(GWAS._f_se(format))

        _add_basic_to_format(format)
        _add_extra_to_format(format)

        self.assertEqual(GWAS._f_snp(format), "_snp")
        self.assertEqual(GWAS._f_effect_allele_column(format), "_effect_allele")
        self.assertEqual(GWAS._f_non_effect_allele_column(format), "_non_effect_allele")
        self.assertEqual(GWAS._f_pvalue(format), "_pvalue")
        self.assertEqual(GWAS._f_zscore(format), "_zscore")
        self.assertEqual(GWAS._f_beta(format), "_beta")
        self.assertEqual(GWAS._f_beta_sign(format), "_beta_sign")
        self.assertEqual(GWAS._f_or(format), "_or")
        self.assertEqual(GWAS._f_se(format), "_se")
Exemple #13
0
 def test_extract(self):
     gwas = GWASUtilities.gwas_from_data(SampleData.sample_gwas_data_3())
     g = GWAS.extract(gwas, ["rs3", "rs6", "rs7"])
     assert_gwas_extracted_from_data_3(self, g)
Exemple #14
0
def build_betas(args, model, gwas_format, name, model_snp_map):
    logging.info("Building beta for %s and %s", name,
                 args.model_db_path if args.model_db_path else "no database")

    load_from = os.path.join(args.gwas_folder,
                             name) if args.gwas_folder else name

    snps = model.snps() if model else None
    b = GWAS.load_gwas(load_from,
                       gwas_format,
                       snps=snps,
                       separator=args.separator,
                       skip_until_header=args.skip_until_header,
                       handle_empty_columns=args.handle_empty_columns,
                       input_pvalue_fix=args.input_pvalue_fix,
                       keep_non_rsid=args.keep_non_rsid)

    if model_snp_map:
        logging.info("Loading mapping")
        PF = PredictionModel.WDBQF
        snp_map = pandas.read_table(model_snp_map)
        snp_map_ = snp_map.rename(columns={
            "a0": PF.K_NON_EFFECT_ALLELE,
            "a1": PF.K_EFFECT_ALLELE
        })[[
            PF.K_RSID, PF.K_EFFECT_ALLELE, PF.K_NON_EFFECT_ALLELE,
            "panel_variant_id", "panel_variant_a0", "panel_variant_a1", "swap"
        ]].drop_duplicates()

        logging.info("Mapping variants")
        columns = [x for x in b.columns.values]
        b = GWASAndModels.align_data_to_alleles(b, snp_map_, Constants.SNP,
                                                PF.K_RSID)
        if GWAS.ZSCORE in b:
            b = b.assign(zscore=b.zscore * b.swap)
        if GWAS.BETA in b:
            b = b.assign(beta=b.beta * b.swap)
        b = b.rename(columns={GWAS.SNP:"gwas_snp", GWAS.EFFECT_ALLELE:"gwas_effect_allele", GWAS.NON_EFFECT_ALLELE:"gwas_non_effect_allele"})\
                .drop(columns=[GWASAndModels.EA_BASE, GWASAndModels.NEA_BASE])\
                .rename(columns={"panel_variant_id":GWAS.SNP, "panel_variant_a0":GWASAndModels.NEA, "panel_variant_a1":GWASAndModels.EA})\
                [["gwas_snp", "gwas_effect_allele", "gwas_non_effect_allele"]+columns]

    if model is not None:
        logging.info("Aligning GWAS to models")
        PF = PredictionModel.WDBQF
        base = model.weights[[
            PF.K_RSID, PF.K_EFFECT_ALLELE, PF.K_NON_EFFECT_ALLELE
        ]].drop_duplicates()
        b = GWASAndModels.align_data_to_alleles(b, base, Constants.SNP,
                                                PF.K_RSID)
        b = b.drop(columns=[GWASAndModels.EA_BASE, GWASAndModels.NEA_BASE])

    b = b.fillna("NA")

    if model is not None:
        logging.info("Trimming output")
        keep = [GWAS.SNP, GWAS.ZSCORE]
        if GWAS.BETA in b: keep.append(GWAS.BETA)
        b = b[keep]

    return b
Exemple #15
0
    def test_load_gwas(self):
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format, strict=False)
        assert_basic_gwas(self, gwas)

        #full format, OR+SE (which is like beta+se)
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_or":"OR",
            "column_se":"SE",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format)
        assert_gwas_zscore_fbse(self, gwas)

        # full format, beta+SE
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_beta":"BETA",
            "column_se":"SE",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format)
        assert_gwas_zscore_fbse(self, gwas)

        # full format, pvalue+beta
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_beta":"BETA",
            "column_pvalue":"P",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format)
        assert_gwas_zscore_pb(self, gwas)

        # full format, pvalue+beta_sign
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_beta_sign":"BETA_SIGN",
            "column_pvalue":"P",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format)
        assert_gwas_zscore_pb(self, gwas)

        # full format, pvalue+or
        gwas_format = {
            "column_snp":"SNPID",
            "column_non_effect_allele":"A2",
            "column_effect_allele":"A1",
            "column_or":"OR",
            "column_pvalue":"P",
            "column_chromosome":"HG19CHRC",
            "column_position":"BP"
        }
        gwas = GWAS.load_gwas("tests/_td/GWAS/scz2/scz2.gwas.results.txt.gz", gwas_format)
        assert_gwas_zscore_pb(self, gwas)
Exemple #16
0
    def test_format_validation(self):
        format = {}
        with self.assertRaises(Exceptions.InvalidArguments): GWAS.validate_format_basic(format)
        with self.assertRaises(Exceptions.InvalidArguments): GWAS.validate_format_for_strict(format)

        _add_basic_to_format(format)
        GWAS.validate_format_basic(format)
        with self.assertRaises(Exceptions.InvalidArguments): GWAS.validate_format_for_strict(format)

        format[GWAS.COLUMN_PVALUE] = "_p"
        format[GWAS.COLUMN_SE] = "_se"
        with self.assertRaises(Exceptions.InvalidArguments): GWAS.validate_format_for_strict(format)

        #
        format = {}
        _add_basic_to_format(format)
        format[GWAS.COLUMN_ZSCORE] = "_z"
        GWAS.validate_format_for_strict(format)

        #
        format = {}
        _add_basic_to_format(format)
        format[GWAS.COLUMN_PVALUE] = "_p"
        format[GWAS.COLUMN_BETA] = "_beta"
        GWAS.validate_format_for_strict(format)

        #
        format = {}
        _add_basic_to_format(format)
        format[GWAS.COLUMN_PVALUE] = "_p"
        format[GWAS.COLUMN_BETA] = "_beta_sign"
        GWAS.validate_format_for_strict(format)

        #
        format = {}
        _add_basic_to_format(format)
        format[GWAS.COLUMN_PVALUE] = "_p"
        format[GWAS.COLUMN_OR] = "_or"
        GWAS.validate_format_for_strict(format)

        #
        format = {}
        _add_basic_to_format(format)
        format[GWAS.COLUMN_SE] = "se"
        with self.assertRaises(Exceptions.InvalidArguments): GWAS.validate_format_for_strict(format)

        #
        format = {}
        _add_basic_to_format(format)
        format[GWAS.COLUMN_SE] = "se"
        format[GWAS.COLUMN_OR] = "or"
        GWAS.validate_format_for_strict(format)

        #
        format = {}
        _add_basic_to_format(format)
        format[GWAS.COLUMN_SE] = "se"
        format[GWAS.COLUMN_OR] = "beta"
        GWAS.validate_format_for_strict(format)
Exemple #17
0
 def test_extract(self):
     gwas = GWASUtilities.gwas_from_data(SampleData.sample_gwas_data_3())
     g = GWAS.extract(gwas, ["rs3", "rs6", "rs7"])
     assert_gwas_extracted_from_data_3(self, g)