Ejemplo n.º 1
0
def run_query(args):
    predicates = get_row_predicates(args)
    add_required_columns_to_query(args)
    formatter = select_formatter(args)
    genotypes_needed = needs_genotypes(args)
    gene_needed = needs_gene(args)
    try:
        subjects = get_subjects(args)
    except KeyError:
        subjects = []
    kwargs = {}
    if args.bcolz:
        from . import gemini_bcolz
        kwargs['variant_id_getter'] = gemini_bcolz.filter

    gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter, **kwargs)
    gq.run(args.query,
           args.gt_filter,
           args.show_variant_samples,
           args.sample_delim,
           predicates,
           genotypes_needed,
           gene_needed,
           args.show_families,
           subjects=subjects)

    if args.use_header and gq.header:
        print(gq.header)

    if not args.dgidb:
        for row in gq:
            print(row)
    else:
        # collect a list of all the genes that need to be queried
        # from DGIdb
        genes = defaultdict()
        for row in gq:
            genes[row['gene']] = True

        # collect info from DGIdb
        dgidb_info = query_dgidb(genes)

        # rerun the query (the cursor is now consumed)
        gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter)
        gq.run(args.query,
               args.gt_filter,
               args.show_variant_samples,
               args.sample_delim,
               predicates,
               genotypes_needed,
               gene_needed,
               args.show_families,
               subjects=subjects,
               **kwargs)

        # report the query results with DGIdb info added at the end.
        for row in gq:
            print(str(row) + "\t" + str(dgidb_info[row['gene']]))
Ejemplo n.º 2
0
def amend_sample(args):
    loaded_subjects = get_subjects(args)
    ped_dict = load_ped_file(args.sample)
    header = get_ped_fields(args.sample)
    with database_transaction(args.db) as c:
        add_columns(header, c, args.clear)
        for k, v in loaded_subjects.items():
            if k in ped_dict:
                item_list = map(quote_string, ped_dict[k])
                sample = zip(header, item_list)
                set_str = ",".join([str(x) + "=" + str(y) for (x, y) in sample])
                sql_query = "update samples set {0} where sample_id={1}"
                c.execute(sql_query.format(set_str, v.sample_id))
Ejemplo n.º 3
0
def run_query(args):
    predicates = get_row_predicates(args)
    add_required_columns_to_query(args)
    formatter = select_formatter(args)
    genotypes_needed = needs_genotypes(args)
    gene_needed = needs_gene(args)
    try:
        subjects = get_subjects(args)
    except KeyError:
        subjects = []
    kwargs = {}
    if args.bcolz:
        from . import gemini_bcolz
        kwargs['variant_id_getter'] = gemini_bcolz.filter

    gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter, **kwargs)
    gq.run(args.query, args.gt_filter, args.show_variant_samples,
           args.sample_delim, predicates, genotypes_needed,
           gene_needed, args.show_families, subjects=subjects)

    if args.use_header and gq.header:
        print(gq.header)

    if not args.dgidb:
        for row in gq:
            print(row)
    else:
        # collect a list of all the genes that need to be queried
        # from DGIdb
        genes = defaultdict()
        for row in gq:
            genes[row['gene']] = True

        # collect info from DGIdb
        dgidb_info = query_dgidb(genes)

        # rerun the query (the cursor is now consumed)
        gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter)
        gq.run(args.query, args.gt_filter, args.show_variant_samples,
               args.sample_delim, predicates, genotypes_needed,
               gene_needed, args.show_families, subjects=subjects, **kwargs)

        # report the query results with DGIdb info added at the end.
        for row in gq:
            print(str(row) + "\t" + str(dgidb_info[row['gene']]))
Ejemplo n.º 4
0
def all_samples_predicate(args):
    """ returns a predicate that returns True if, for a variant,
    the only samples that have the variant have a given phenotype
    """
    subjects = get_subjects(args).values()
    return select_subjects_predicate(subjects, args)
Ejemplo n.º 5
0
def all_samples_predicate(args):
    """ returns a predicate that returns True if, for a variant,
    the only samples that have the variant have a given phenotype
    """
    subjects = get_subjects(args).values()
    return select_subjects_predicate(subjects, args)