def run_query(args): predicates = get_row_predicates(args) add_required_columns_to_query(args) formatter = select_formatter(args) genotypes_needed = needs_genotypes(args) gene_needed = needs_gene(args) try: subjects = get_subjects(args) except KeyError: subjects = [] kwargs = {} if args.bcolz: from . import gemini_bcolz kwargs['variant_id_getter'] = gemini_bcolz.filter gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter, **kwargs) gq.run(args.query, args.gt_filter, args.show_variant_samples, args.sample_delim, predicates, genotypes_needed, gene_needed, args.show_families, subjects=subjects) if args.use_header and gq.header: print(gq.header) if not args.dgidb: for row in gq: print(row) else: # collect a list of all the genes that need to be queried # from DGIdb genes = defaultdict() for row in gq: genes[row['gene']] = True # collect info from DGIdb dgidb_info = query_dgidb(genes) # rerun the query (the cursor is now consumed) gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter) gq.run(args.query, args.gt_filter, args.show_variant_samples, args.sample_delim, predicates, genotypes_needed, gene_needed, args.show_families, subjects=subjects, **kwargs) # report the query results with DGIdb info added at the end. for row in gq: print(str(row) + "\t" + str(dgidb_info[row['gene']]))
def amend_sample(args): loaded_subjects = get_subjects(args) ped_dict = load_ped_file(args.sample) header = get_ped_fields(args.sample) with database_transaction(args.db) as c: add_columns(header, c, args.clear) for k, v in loaded_subjects.items(): if k in ped_dict: item_list = map(quote_string, ped_dict[k]) sample = zip(header, item_list) set_str = ",".join([str(x) + "=" + str(y) for (x, y) in sample]) sql_query = "update samples set {0} where sample_id={1}" c.execute(sql_query.format(set_str, v.sample_id))
def all_samples_predicate(args): """ returns a predicate that returns True if, for a variant, the only samples that have the variant have a given phenotype """ subjects = get_subjects(args).values() return select_subjects_predicate(subjects, args)