Esempio n. 1
0
def run(args, _gwas=None):
    start = timer()
    if not args.overwrite and os.path.exists(args.output_file):
        logging.info("%s already exists, move it or delete it if you want it done again", args.output_file)
        return
    logging.info("Started metaxcan association")

    context = MetaxcanUtilities.build_context(args, _gwas)

    model_snps = context.get_model_snps()
    total_snps = len(model_snps)
    snps_found=set()
    reporter = Utilities.PercentReporter(logging.INFO, total_snps)

    i_genes, i_snps = context.get_data_intersection()

    results = []
    for gene in i_genes:
        r, snps = AssociationCalculation.association(gene, context, return_snps=True)
        results.append(r)
        snps_found.update(snps)
        reporter.update(len(snps_found), "%d %% of model's snps found so far in the gwas study")

    Utilities.ensure_requisite_folders(args.output_file)

    reporter.update(len(snps_found), "%d %% of model's snps used", force=True)
    results = AssociationCalculation.dataframe_from_results(zip(*results))
    results = MetaxcanUtilities.format_output(results, context, args.keep_ens_version)
    results.to_csv(args.output_file, index=False)
    end = timer()
    logging.info("Sucessfully processed metaxcan association in %s seconds"%(str(end - start)))
Esempio n. 2
0
def run_metaxcan(args, context):
    logging.info("Started metaxcan association")
    model_snps = context.get_model_snps()
    total_snps = len(model_snps)
    snps_found=set()
    reporter = Utilities.PercentReporter(logging.INFO, total_snps)

    i_genes, i_snps = context.get_data_intersection()

    results = []
    for gene in i_genes:
        logging.log(7, "Processing gene %s", gene)
        r, snps = AssociationCalculation.association(gene, context, return_snps=True)
        results.append(r)
        snps_found.update(snps)
        reporter.update(len(snps_found), "%d %% of model's snps found so far in the gwas study")

    reporter.update(len(snps_found), "%d %% of model's snps used", force=True)

    results = AssociationCalculation.dataframe_from_results(results)
    results = MetaxcanUtilities.format_output(results, context, args.remove_ens_version)

    if args.output_file:
        Utilities.ensure_requisite_folders(args.output_file)
        results.to_csv(args.output_file, index=False)

    return results
Esempio n. 3
0
def run_additional(args, context):
    logging.info("Started metaxcan additional stats")
    i_genes, i_snps = context.get_data_intersection()
    results = []
    for gene in i_genes:
        stats_ = AssociationCalculation.additional_stats(gene, context)
        results.append(stats_)

    results = AssociationCalculation.dataframe_from_aditional_stats(results)
    results = MetaxcanUtilities.format_additional_output(results, context, args.remove_ens_version)

    if args.additional_output:
        Utilities.ensure_requisite_folders(args.additional_output)
        results.to_csv(args.additional_output, index=False)

    return results
    def test_build_context(self):
        c = _context()
        r, snps = AssociationCalculation.association("A", c, return_snps=True)
        assert_equal_tuple(self, r,
                           ('A', 0.42313735862217716, 0.42845528455235105,
                            0.10250000000002803, 4, 4, 3))

        r, snps = AssociationCalculation.association("B", c, return_snps=True)
        assert_equal_tuple(self, r,
                           ('B', 1.904102672555114, 1.4285714285708686,
                            0.16333333333323405, 6, 6, 6))

        r, snps = AssociationCalculation.association("C", c, return_snps=True)
        assert_equal_tuple(self, r,
                           ('C', 0.089999999999999983, 0.049999999999999989,
                            0.013333333333320003, 3, 2, 1))

        r, snps = AssociationCalculation.association("D", c, return_snps=True)
        assert_equal_tuple(
            self, r, ('D', numpy.nan, numpy.nan, numpy.nan, 2, numpy.nan, 0))

        r, snps = AssociationCalculation.association("E", c, return_snps=True)
        assert_equal_tuple(
            self, r, ('E', numpy.nan, numpy.nan, numpy.nan, 1, numpy.nan, 0))

        #The following is a case of "wrong" data
        r, snps = AssociationCalculation.association("F", c, return_snps=True)
        assert_equal_tuple(self, r, ('F', numpy.nan, numpy.nan, 0, 2, 2, 2))

        r, snps = AssociationCalculation.association("G", c, return_snps=True)
        assert_equal_tuple(self, r, ('G', numpy.nan, numpy.nan, 0, 1, 1, 1))
Esempio n. 5
0
def run_metaxcan(args, context):
    logging.info("Started metaxcan association")
    model_snps = context.get_model_snps()
    total_snps = len(model_snps)
    snps_found = set()
    reporter = Utilities.PercentReporter(logging.INFO, total_snps)

    i_genes, i_snps = context.get_data_intersection()

    results = []
    additional = []
    for i, gene in enumerate(i_genes):
        if args.MAX_R and i + 1 > args.MAX_R:
            logging.log("Early exit condition met")
            break
        logging.log(9, "Processing gene %i:%s", i, gene)
        r, snps = AssociationCalculation.association(gene,
                                                     context,
                                                     return_snps=True)
        results.append(r)
        snps_found.update(snps)
        reporter.update(
            len(snps_found),
            "%d %% of model's snps found so far in the gwas study")
        if args.additional_output:
            stats_ = AssociationCalculation.additional_stats(gene, context)
            additional.append(stats_)

    reporter.update(len(snps_found), "%d %% of model's snps used", force=True)

    results = AssociationCalculation.dataframe_from_results(results)
    results = MetaxcanUtilities.format_output(results, context,
                                              args.remove_ens_version)

    if args.additional_output:
        additional = AssociationCalculation.dataframe_from_aditional_stats(
            additional)
        results = MetaxcanUtilities.merge_additional_output(
            results, additional, context, args.remove_ens_version)

    if args.output_file:
        Utilities.ensure_requisite_folders(args.output_file)
        results.to_csv(args.output_file, index=False)

    return results
Esempio n. 6
0
    def test_dataframe_from_results(self):
        results = [
            ('A', 0.42313735862217716, 0.42845528455235105, 0.10250000000002803, 4, 4, 3),
            ('B', 1.904102672555114, 1.4285714285708686, 0.16333333333323405, 6, 6, 6),
            ('C', 0.089999999999999983, 0.049999999999999989, 0.013333333333320003, 3, 2, 1)]
        d = AssociationCalculation.dataframe_from_results(results)
        A = AssociationCalculation.ARF

        r_ = list(zip(*results))
        numpy.testing.assert_array_equal(d[A.K_GENE], r_[A.GENE])
        numpy.testing.assert_array_equal(d[A.K_ZSCORE], r_[A.ZSCORE])
        numpy.testing.assert_array_equal(d[A.K_EFFECT_SIZE], r_[A.EFFECT_SIZE])
        numpy.testing.assert_array_equal(d[A.K_N_SNPS_IN_MODEL], r_[A.N_SNPS_IN_MODEL])
        numpy.testing.assert_array_equal(d[A.K_N_SNPS_IN_COV], r_[A.N_SNPS_IN_COV])
        numpy.testing.assert_array_equal(d[A.K_N_SNPS_USED], r_[A.N_SNPS_USED])
    def test_dataframe_from_results(self):
        results = [
            ('A', 0.42313735862217716, 0.42845528455235105, 0.10250000000002803, 4, 4, 3),
            ('B', 1.904102672555114, 1.4285714285708686, 0.16333333333323405, 6, 6, 6),
            ('C', 0.089999999999999983, 0.049999999999999989, 0.013333333333320003, 3, 2, 1)]
        d = AssociationCalculation.dataframe_from_results(results)
        A = AssociationCalculation.ARF

        r_ = zip(*results)
        numpy.testing.assert_array_equal(d[A.K_GENE], r_[A.GENE])
        numpy.testing.assert_array_equal(d[A.K_ZSCORE], r_[A.ZSCORE])
        numpy.testing.assert_array_equal(d[A.K_EFFECT_SIZE], r_[A.EFFECT_SIZE])
        numpy.testing.assert_array_equal(d[A.K_N_SNPS_IN_MODEL], r_[A.N_SNPS_IN_MODEL])
        numpy.testing.assert_array_equal(d[A.K_N_SNPS_IN_COV], r_[A.N_SNPS_IN_COV])
        numpy.testing.assert_array_equal(d[A.K_N_SNPS_USED], r_[A.N_SNPS_USED])
    def test_build_context(self):
        c = _context()
        r, snps = AssociationCalculation.association("A", c, return_snps=True)
        self.assertEqual(r, ('A', 0.42313735862217716, 0.42845528455235105, 0.10250000000002803, 4, 4, 3))

        r, snps = AssociationCalculation.association("B", c, return_snps=True)
        self.assertEqual(r, ('B', 1.904102672555114, 1.4285714285708686, 0.16333333333323405, 6, 6, 6))

        r, snps = AssociationCalculation.association("C", c, return_snps=True)
        self.assertEqual(r, ('C', 0.089999999999999983, 0.049999999999999989, 0.013333333333320003, 3, 2, 1))

        r, snps = AssociationCalculation.association("D", c, return_snps=True)
        self.assertEqual(r, ('D', numpy.nan, numpy.nan, numpy.nan, 2, numpy.nan, 0))

        r, snps = AssociationCalculation.association("E", c, return_snps=True)
        self.assertEqual(r, ('E', numpy.nan, numpy.nan, numpy.nan, 1, numpy.nan, 0))

        #The following is a case of "wrong" data
        r, snps = AssociationCalculation.association("F", c, return_snps=True)
        self.assertEqual(r, ('F', numpy.nan, numpy.nan, 0, 2, 2, 2))

        r, snps = AssociationCalculation.association("G", c, return_snps=True)
        self.assertEqual(r, ('G', numpy.nan, numpy.nan, 0, 1, 1, 1))