def tfam(args): """ Report the information about the samples in the DB in TFAM format: http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml """ query = ("select family_id, name, paternal_id, maternal_id, " "sex, phenotype from samples") gq = GeminiQuery(args.db) gq.run(query) for row in gq: print " ".join(map(str, [row['family_id'], row['name'], row['paternal_id'], row['maternal_id'], row['sex'], row['phenotype']]))
def tfam(args): """ Report the information about the samples in the DB in TFAM format: http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml """ query = ("select family_id, name, paternal_id, maternal_id, " "sex, phenotype from samples") gq = GeminiQuery(args.db) gq.run(query) for row in gq: print " ".join(map(str, [row['family_id'], row['name'], row['paternal_id'], row['maternal_id'], row['sex'], row['phenotype']]))
def genewise(db, gt_filters, filter=None, columns=None, min_filters=None, min_variants=1, grouper="gene"): assert os.path.exists(db) orig_columns = [x.strip() for x in (columns or "").split(",")] added_cols = add_cols(orig_columns, "||".join(gt_filters)) if grouper not in orig_columns: added_cols.append(grouper) columns = orig_columns + added_cols assert not any(';' in c for c in columns) # NOTE: we could make the WHERE part customizable. query = "SELECT {columns} FROM variants WHERE (is_exonic = 1 AND impact_severity != 'LOW')" if filter: query += " AND " + filter query += " ORDER BY CHROM, gene" gq = GeminiQuery(db, include_gt_cols=True) # use the GeminiQuery machinery to transform to something that's eval'able # then compile once for speed. cleaned_filters = [] for gt_filter in gt_filters: assert gq._is_gt_filter_safe(gt_filter) gt_filter = gq._correct_genotype_filter(gt_filter) cleaned_filters.append(compile(gt_filter, gt_filter, 'eval')) gq.run(query.format(columns=", ".join(columns))) if isinstance(grouper, basestring): grouper = operator.itemgetter(grouper) user_dict = dict(sample_info=gq.sample_info) header_printed = False for groupkey, grp in it.groupby(gq, grouper): grp = list(grp) for x in gen_results(list(grp), cleaned_filters, min_filters or 0, min_variants, columns, user_dict=user_dict): for c in added_cols: if c != 'gene': del x.print_fields[c] if not header_printed: print "\t".join(x.print_fields.keys()) header_printed = True print x
def genewise(db, gt_filters, filter=None, columns=None, min_filters=None, min_variants=1, grouper="gene"): assert os.path.exists(db) orig_columns = [x.strip() for x in (columns or "").split(",")] added_cols = add_cols(orig_columns, "||".join(gt_filters)) if grouper not in orig_columns: added_cols.append(grouper) columns = orig_columns + added_cols assert not any(';' in c for c in columns) # NOTE: we could make the WHERE part customizable. query = "SELECT {columns} FROM variants WHERE (is_exonic = 1 AND impact_severity != 'LOW')" if filter: query += " AND " + filter query += " ORDER BY CHROM, gene" gq = GeminiQuery(db, include_gt_cols=True) # use the GeminiQuery machinery to transform to something that's eval'able # then compile once for speed. cleaned_filters = [] for gt_filter in gt_filters: assert gq._is_gt_filter_safe(gt_filter) gt_filter = gq._correct_genotype_filter(gt_filter) cleaned_filters.append(compile(gt_filter, gt_filter, 'eval')) gq.run(query.format(columns=", ".join(columns))) if isinstance(grouper, basestring): grouper = operator.itemgetter(grouper) user_dict = dict(sample_info=gq.sample_info) header_printed = False for groupkey, grp in it.groupby(gq, grouper): grp = list(grp) for x in gen_results(list(grp), cleaned_filters, min_filters or 0, min_variants, columns, user_dict=user_dict): for c in added_cols: if c != 'gene': del x.print_fields[c] if not header_printed: print "\t".join(x.print_fields.keys()) header_printed = True print x