Example #1
0
def run(args, _gwas=None):
    start = timer()
    if not args.overwrite and os.path.exists(args.output_file):
        logging.info("%s already exists, move it or delete it if you want it done again", args.output_file)
        return
    logging.info("Started metaxcan association")

    context = MetaxcanUtilities.build_context(args, _gwas)

    model_snps = context.get_model_snps()
    total_snps = len(model_snps)
    snps_found=set()
    reporter = Utilities.PercentReporter(logging.INFO, total_snps)

    i_genes, i_snps = context.get_data_intersection()

    results = []
    for gene in i_genes:
        r, snps = AssociationCalculation.association(gene, context, return_snps=True)
        results.append(r)
        snps_found.update(snps)
        reporter.update(len(snps_found), "%d %% of model's snps found so far in the gwas study")

    Utilities.ensure_requisite_folders(args.output_file)

    reporter.update(len(snps_found), "%d %% of model's snps used", force=True)
    results = AssociationCalculation.dataframe_from_results(zip(*results))
    results = MetaxcanUtilities.format_output(results, context, args.keep_ens_version)
    results.to_csv(args.output_file, index=False)
    end = timer()
    logging.info("Sucessfully processed metaxcan association in %s seconds"%(str(end - start)))
Example #2
0
def run_metaxcan(args, context):
    logging.info("Started metaxcan association")
    model_snps = context.get_model_snps()
    total_snps = len(model_snps)
    snps_found=set()
    reporter = Utilities.PercentReporter(logging.INFO, total_snps)

    i_genes, i_snps = context.get_data_intersection()

    results = []
    for gene in i_genes:
        logging.log(7, "Processing gene %s", gene)
        r, snps = AssociationCalculation.association(gene, context, return_snps=True)
        results.append(r)
        snps_found.update(snps)
        reporter.update(len(snps_found), "%d %% of model's snps found so far in the gwas study")

    reporter.update(len(snps_found), "%d %% of model's snps used", force=True)

    results = AssociationCalculation.dataframe_from_results(results)
    results = MetaxcanUtilities.format_output(results, context, args.remove_ens_version)

    if args.output_file:
        Utilities.ensure_requisite_folders(args.output_file)
        results.to_csv(args.output_file, index=False)

    return results
    def test_build_context(self):
        c = _context()
        r, snps = AssociationCalculation.association("A", c, return_snps=True)
        assert_equal_tuple(self, r,
                           ('A', 0.42313735862217716, 0.42845528455235105,
                            0.10250000000002803, 4, 4, 3))

        r, snps = AssociationCalculation.association("B", c, return_snps=True)
        assert_equal_tuple(self, r,
                           ('B', 1.904102672555114, 1.4285714285708686,
                            0.16333333333323405, 6, 6, 6))

        r, snps = AssociationCalculation.association("C", c, return_snps=True)
        assert_equal_tuple(self, r,
                           ('C', 0.089999999999999983, 0.049999999999999989,
                            0.013333333333320003, 3, 2, 1))

        r, snps = AssociationCalculation.association("D", c, return_snps=True)
        assert_equal_tuple(
            self, r, ('D', numpy.nan, numpy.nan, numpy.nan, 2, numpy.nan, 0))

        r, snps = AssociationCalculation.association("E", c, return_snps=True)
        assert_equal_tuple(
            self, r, ('E', numpy.nan, numpy.nan, numpy.nan, 1, numpy.nan, 0))

        #The following is a case of "wrong" data
        r, snps = AssociationCalculation.association("F", c, return_snps=True)
        assert_equal_tuple(self, r, ('F', numpy.nan, numpy.nan, 0, 2, 2, 2))

        r, snps = AssociationCalculation.association("G", c, return_snps=True)
        assert_equal_tuple(self, r, ('G', numpy.nan, numpy.nan, 0, 1, 1, 1))
Example #4
0
def run_metaxcan(args, context):
    logging.info("Started metaxcan association")
    model_snps = context.get_model_snps()
    total_snps = len(model_snps)
    snps_found = set()
    reporter = Utilities.PercentReporter(logging.INFO, total_snps)

    i_genes, i_snps = context.get_data_intersection()

    results = []
    additional = []
    for i, gene in enumerate(i_genes):
        if args.MAX_R and i + 1 > args.MAX_R:
            logging.log("Early exit condition met")
            break
        logging.log(9, "Processing gene %i:%s", i, gene)
        r, snps = AssociationCalculation.association(gene,
                                                     context,
                                                     return_snps=True)
        results.append(r)
        snps_found.update(snps)
        reporter.update(
            len(snps_found),
            "%d %% of model's snps found so far in the gwas study")
        if args.additional_output:
            stats_ = AssociationCalculation.additional_stats(gene, context)
            additional.append(stats_)

    reporter.update(len(snps_found), "%d %% of model's snps used", force=True)

    results = AssociationCalculation.dataframe_from_results(results)
    results = MetaxcanUtilities.format_output(results, context,
                                              args.remove_ens_version)

    if args.additional_output:
        additional = AssociationCalculation.dataframe_from_aditional_stats(
            additional)
        results = MetaxcanUtilities.merge_additional_output(
            results, additional, context, args.remove_ens_version)

    if args.output_file:
        Utilities.ensure_requisite_folders(args.output_file)
        results.to_csv(args.output_file, index=False)

    return results
    def test_build_context(self):
        c = _context()
        r, snps = AssociationCalculation.association("A", c, return_snps=True)
        self.assertEqual(r, ('A', 0.42313735862217716, 0.42845528455235105, 0.10250000000002803, 4, 4, 3))

        r, snps = AssociationCalculation.association("B", c, return_snps=True)
        self.assertEqual(r, ('B', 1.904102672555114, 1.4285714285708686, 0.16333333333323405, 6, 6, 6))

        r, snps = AssociationCalculation.association("C", c, return_snps=True)
        self.assertEqual(r, ('C', 0.089999999999999983, 0.049999999999999989, 0.013333333333320003, 3, 2, 1))

        r, snps = AssociationCalculation.association("D", c, return_snps=True)
        self.assertEqual(r, ('D', numpy.nan, numpy.nan, numpy.nan, 2, numpy.nan, 0))

        r, snps = AssociationCalculation.association("E", c, return_snps=True)
        self.assertEqual(r, ('E', numpy.nan, numpy.nan, numpy.nan, 1, numpy.nan, 0))

        #The following is a case of "wrong" data
        r, snps = AssociationCalculation.association("F", c, return_snps=True)
        self.assertEqual(r, ('F', numpy.nan, numpy.nan, 0, 2, 2, 2))

        r, snps = AssociationCalculation.association("G", c, return_snps=True)
        self.assertEqual(r, ('G', numpy.nan, numpy.nan, 0, 1, 1, 1))