def run_query(args): predicates = get_row_predicates(args) add_required_columns_to_query(args) formatter = select_formatter(args) genotypes_needed = needs_genotypes(args) gene_needed = needs_gene(args) try: subjects = get_subjects(args) except KeyError: subjects = [] kwargs = {} if args.bcolz: import gemini_bcolz kwargs['variant_id_getter'] = gemini_bcolz.filter gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter, **kwargs) gq.run(args.query, args.gt_filter, args.show_variant_samples, args.sample_delim, predicates, genotypes_needed, gene_needed, args.show_families, subjects=subjects) if args.use_header and gq.header: print gq.header if not args.dgidb: for row in gq: print row else: # collect a list of all the genes that need to be queried # from DGIdb genes = defaultdict() for row in gq: genes[row['gene']] = True # collect info from DGIdb dgidb_info = query_dgidb(genes) # rerun the query (the cursor is now consumed) gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter) gq.run(args.query, args.gt_filter, args.show_variant_samples, args.sample_delim, predicates, genotypes_needed, gene_needed, args.show_families, subjects=subjects, **kwargs) # report the query results with DGIdb info added at the end. for row in gq: print str(row) + "\t" + str(dgidb_info[row['gene']])
def get_actionable_mutations(parser, args): t_n_pairs = gemini_subjects.get_families(args.db) query = "SELECT variants.chrom, start, end, ref, alt, \ variants.gene, impact, is_somatic, \ gene_summary.in_cosmic_census \ FROM variants, gene_summary \ WHERE variants.is_somatic = 1 \ AND (variants.type = 'snp' \ OR variants.type = 'indel') \ AND (variants.impact_severity = 'HIGH' \ OR variants.impact_severity = 'MED') \ AND variants.chrom = gene_summary.chrom \ AND variants.gene = gene_summary.gene \ AND gene_summary.in_cosmic_census = 1" # collect the relevant genes and query DGIDB gq = GeminiQuery.GeminiQuery(args.db) gq.run(query) genes = defaultdict() for row in gq: genes[row['gene']] = True # collect info from DGIdb dgidb_info = query_dgidb(genes) # now rerun the query and report actionable mutations per DGIDB and COSMIC census. gq = GeminiQuery.GeminiQuery(args.db) gq.run(query) print'\t'.join(['tum_name', 'chrom', 'start', 'end', 'ref', 'alt', \ 'gene', 'impact', 'is_somatic', 'in_cosmic_census', 'dgidb_info']) for row in gq: for pair in t_n_pairs: samples = pair.subjects if len(samples) != 2: continue tumor = pair.subjects[0] normal = pair.subjects[1] # swap if we guessed the tumor incorrectly if tumor.affected is False: tumor, normal = normal, tumor print'\t'.join(str(s) for s in [tumor.name, row['chrom'], \ row['start'], row['end'], \ row['ref'], row['alt'], \ row['gene'], row['impact'], \ row['is_somatic'], \ row['in_cosmic_census'], \ str(dgidb_info[row['gene']])])