Esempio n. 1
0
def run_query(args):
    predicates = get_row_predicates(args)
    add_required_columns_to_query(args)
    formatter = select_formatter(args)
    genotypes_needed = needs_genotypes(args)
    gene_needed = needs_gene(args)
    try:
        subjects = get_subjects(args)
    except KeyError:
        subjects = []
    kwargs = {}
    if args.bcolz:
        import gemini_bcolz
        kwargs['variant_id_getter'] = gemini_bcolz.filter

    gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter, **kwargs)
    gq.run(args.query,
           args.gt_filter,
           args.show_variant_samples,
           args.sample_delim,
           predicates,
           genotypes_needed,
           gene_needed,
           args.show_families,
           subjects=subjects)

    if args.use_header and gq.header:
        print gq.header

    if not args.dgidb:
        for row in gq:
            print row
    else:
        # collect a list of all the genes that need to be queried
        # from DGIdb
        genes = defaultdict()
        for row in gq:
            genes[row['gene']] = True

        # collect info from DGIdb
        dgidb_info = query_dgidb(genes)

        # rerun the query (the cursor is now consumed)
        gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter)
        gq.run(args.query,
               args.gt_filter,
               args.show_variant_samples,
               args.sample_delim,
               predicates,
               genotypes_needed,
               gene_needed,
               args.show_families,
               subjects=subjects,
               **kwargs)

        # report the query results with DGIdb info added at the end.
        for row in gq:
            print str(row) + "\t" + str(dgidb_info[row['gene']])
def get_actionable_mutations(parser, args):

    t_n_pairs = gemini_subjects.get_families(args.db)

    query = "SELECT variants.chrom, start, end, ref, alt, \
                    variants.gene, impact, is_somatic, \
                    gene_summary.in_cosmic_census \
             FROM variants, gene_summary \
             WHERE variants.is_somatic = 1 \
             AND (variants.type = 'snp' \
                 OR variants.type = 'indel') \
             AND (variants.impact_severity = 'HIGH' \
                 OR variants.impact_severity = 'MED') \
             AND variants.chrom = gene_summary.chrom \
             AND variants.gene = gene_summary.gene \
             AND gene_summary.in_cosmic_census = 1"


    # collect the relevant genes and query DGIDB
    gq = GeminiQuery.GeminiQuery(args.db)
    gq.run(query)

    genes = defaultdict()
    for row in gq:
      genes[row['gene']] = True
    # collect info from DGIdb
    dgidb_info = query_dgidb(genes)


    # now rerun the query and report actionable mutations per DGIDB and COSMIC census.
    gq = GeminiQuery.GeminiQuery(args.db)
    gq.run(query)
    print'\t'.join(['tum_name', 'chrom', 'start', 'end', 'ref', 'alt', \
                    'gene', 'impact', 'is_somatic', 'in_cosmic_census', 'dgidb_info'])
    for row in gq:

        for pair in t_n_pairs:
            samples = pair.subjects
            if len(samples) != 2:
                continue

            tumor = pair.subjects[0]
            normal = pair.subjects[1]
            # swap if we guessed the tumor incorrectly
            if tumor.affected is False:
                tumor, normal = normal, tumor

            print'\t'.join(str(s) for s in [tumor.name, row['chrom'], \
                                            row['start'], row['end'], \
                                            row['ref'], row['alt'], \
                                            row['gene'], row['impact'], \
                                            row['is_somatic'], \
                                            row['in_cosmic_census'], \
                                            str(dgidb_info[row['gene']])])
def get_actionable_mutations(parser, args):

    t_n_pairs = gemini_subjects.get_families(args.db)

    query = "SELECT variants.chrom, start, end, ref, alt, \
                    variants.gene, impact, is_somatic, \
                    gene_summary.in_cosmic_census \
             FROM variants, gene_summary \
             WHERE variants.is_somatic = 1 \
             AND (variants.type = 'snp' \
                 OR variants.type = 'indel') \
             AND (variants.impact_severity = 'HIGH' \
                 OR variants.impact_severity = 'MED') \
             AND variants.chrom = gene_summary.chrom \
             AND variants.gene = gene_summary.gene \
             AND gene_summary.in_cosmic_census = 1"


    # collect the relevant genes and query DGIDB
    gq = GeminiQuery.GeminiQuery(args.db)
    gq.run(query)

    genes = defaultdict()
    for row in gq:
        genes[row['gene']] = True
    # collect info from DGIdb
    dgidb_info = query_dgidb(genes)


    # now rerun the query and report actionable mutations per DGIDB and COSMIC census.
    gq = GeminiQuery.GeminiQuery(args.db)
    gq.run(query)
    print'\t'.join(['tum_name', 'chrom', 'start', 'end', 'ref', 'alt', \
                    'gene', 'impact', 'is_somatic', 'in_cosmic_census', 'dgidb_info'])
    for row in gq:
        for pair in t_n_pairs:
            samples = pair.subjects
            if len(samples) != 2:
                continue

            tumor = pair.subjects[0]
            normal = pair.subjects[1]
            # swap if we guessed the tumor incorrectly
            if tumor.affected is False:
                tumor, normal = normal, tumor

            print'\t'.join(str(s) for s in [tumor.name, row['chrom'], \
                                            row['start'], row['end'], \
                                            row['ref'], row['alt'], \
                                            row['gene'], row['impact'], \
                                            row['is_somatic'], \
                                            row['in_cosmic_census'], \
                                            str(dgidb_info[row['gene']])])
Esempio n. 4
0
def run_query(args):
    predicates = get_row_predicates(args)
    add_required_columns_to_query(args)
    formatter = select_formatter(args)
    genotypes_needed = needs_genotypes(args)
    gene_needed = needs_gene(args)
    try:
        subjects = get_subjects(args)
    except KeyError:
        subjects = []
    kwargs = {}
    if args.bcolz:
        import gemini_bcolz
        kwargs['variant_id_getter'] = gemini_bcolz.filter

    gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter, **kwargs)
    gq.run(args.query, args.gt_filter, args.show_variant_samples,
           args.sample_delim, predicates, genotypes_needed,
           gene_needed, args.show_families, subjects=subjects)

    if args.use_header and gq.header:
        print gq.header

    if not args.dgidb:
        for row in gq:
            print row
    else:
        # collect a list of all the genes that need to be queried
        # from DGIdb
        genes = defaultdict()
        for row in gq:
            genes[row['gene']] = True

        # collect info from DGIdb
        dgidb_info = query_dgidb(genes)

        # rerun the query (the cursor is now consumed)
        gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter)
        gq.run(args.query, args.gt_filter, args.show_variant_samples,
               args.sample_delim, predicates, genotypes_needed,
               gene_needed, args.show_families, subjects=subjects, **kwargs)

        # report the query results with DGIdb info added at the end.
        for row in gq:
            print str(row) + "\t" + str(dgidb_info[row['gene']])