def run_query(args): predicates = get_row_predicates(args) add_required_columns_to_query(args) formatter = select_formatter(args) genotypes_needed = needs_genotypes(args) gene_needed = needs_gene(args) try: subjects = get_subjects(args) except KeyError: subjects = [] kwargs = {} if args.bcolz: import gemini_bcolz kwargs['variant_id_getter'] = gemini_bcolz.filter gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter, **kwargs) gq.run(args.query, args.gt_filter, args.show_variant_samples, args.sample_delim, predicates, genotypes_needed, gene_needed, args.show_families, subjects=subjects) if args.use_header and gq.header: print gq.header if not args.dgidb: for row in gq: print row else: # collect a list of all the genes that need to be queried # from DGIdb genes = defaultdict() for row in gq: genes[row['gene']] = True # collect info from DGIdb dgidb_info = query_dgidb(genes) # rerun the query (the cursor is now consumed) gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter) gq.run(args.query, args.gt_filter, args.show_variant_samples, args.sample_delim, predicates, genotypes_needed, gene_needed, args.show_families, subjects=subjects, **kwargs) # report the query results with DGIdb info added at the end. for row in gq: print str(row) + "\t" + str(dgidb_info[row['gene']])
def family_wise_predicate(args): formatter = select_formatter(args) families = get_family_dict(args) gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter) predicates = [] for f in families.values(): family_names = [x.name for x in f] subjects = get_subjects_in_family(args, f).values() predicates.append(select_subjects_predicate(subjects, args, family_names)) def predicate(row): return sum([p(row) for p in predicates]) >= args.min_kindreds return predicate
def run_query(args): predicates = get_row_predicates(args) add_required_columns_to_query(args) formatter = select_formatter(args) gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter) gq.run(args.query, args.gt_filter, args.show_variant_samples, args.sample_delim, predicates, needs_genotypes(args)) if args.use_header and gq.header: print gq.header for row in gq: print row
def region(parser, args): if os.path.exists(args.db): formatter = select_formatter(args) gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter) if args.region is not None and args.gene is not None: sys.exit('EXITING: Choose either --reg or --gene, not both.\n') elif args.region is not None: get_region(args, gq) elif args.gene is not None: get_gene(args, gq)
def run_query(args): predicates = get_row_predicates(args) add_required_columns_to_query(args) formatter = select_formatter(args) genotypes_needed = needs_genotypes(args) gene_needed = needs_gene(args) gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter) gq.run( args.query, args.gt_filter, args.show_variant_samples, args.sample_delim, predicates, genotypes_needed, gene_needed, args.show_families, ) if args.use_header and gq.header: print gq.header if not args.dgidb: for row in gq: print row else: # collect a list of all the genes that need to be queried # from DGIdb genes = defaultdict() for row in gq: genes[row["gene"]] = True # collect info from DGIdb dgidb_info = query_dgidb(genes) # rerun the query (the cursor is now consumed) gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter) gq.run( args.query, args.gt_filter, args.show_variant_samples, args.sample_delim, predicates, genotypes_needed, gene_needed, args.show_families, ) # report the query results with DGIdb info added at the end. for row in gq: print str(row) + "\t" + str(dgidb_info[row["gene"]])
def run_query(args): start_time = time.time() predicates = get_row_predicates(args) add_required_columns_to_query(args) formatter = select_formatter(args) genotypes_needed = needs_genotypes(args) gene_needed = needs_gene(args) sample_names_needed = args.sample_filter or args.family_wise gq = GeminiQuery.GeminiQuery(args.contact_points, args.keyspace, out_format=formatter) gq.run(args.query, args.gt_filter, args.show_variant_samples, args.sample_delim, predicates, genotypes_needed, gene_needed, args.show_families, args.testing, sample_names_needed, args.cores, start_time, args.use_header, args.exp_id, args.timeout, args.batch_size)