Esempio n. 1
0
def run_query(args):
    predicates = get_row_predicates(args)
    add_required_columns_to_query(args)
    formatter = select_formatter(args)
    genotypes_needed = needs_genotypes(args)
    gene_needed = needs_gene(args)
    try:
        subjects = get_subjects(args)
    except KeyError:
        subjects = []
    kwargs = {}
    if args.bcolz:
        import gemini_bcolz
        kwargs['variant_id_getter'] = gemini_bcolz.filter

    gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter, **kwargs)
    gq.run(args.query,
           args.gt_filter,
           args.show_variant_samples,
           args.sample_delim,
           predicates,
           genotypes_needed,
           gene_needed,
           args.show_families,
           subjects=subjects)

    if args.use_header and gq.header:
        print gq.header

    if not args.dgidb:
        for row in gq:
            print row
    else:
        # collect a list of all the genes that need to be queried
        # from DGIdb
        genes = defaultdict()
        for row in gq:
            genes[row['gene']] = True

        # collect info from DGIdb
        dgidb_info = query_dgidb(genes)

        # rerun the query (the cursor is now consumed)
        gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter)
        gq.run(args.query,
               args.gt_filter,
               args.show_variant_samples,
               args.sample_delim,
               predicates,
               genotypes_needed,
               gene_needed,
               args.show_families,
               subjects=subjects,
               **kwargs)

        # report the query results with DGIdb info added at the end.
        for row in gq:
            print str(row) + "\t" + str(dgidb_info[row['gene']])
Esempio n. 2
0
def family_wise_predicate(args):
    formatter = select_formatter(args)
    families = get_family_dict(args)
    gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter)
    predicates = []
    for f in families.values():
        family_names = [x.name for x in f]
        subjects = get_subjects_in_family(args, f).values()
        predicates.append(select_subjects_predicate(subjects, args,
                                                    family_names))
    def predicate(row):
        return sum([p(row) for p in predicates]) >= args.min_kindreds
    return predicate
Esempio n. 3
0
def run_query(args):
    predicates = get_row_predicates(args)
    add_required_columns_to_query(args)
    formatter = select_formatter(args)
    gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter)
    gq.run(args.query, args.gt_filter, args.show_variant_samples,
           args.sample_delim, predicates, needs_genotypes(args))

    if args.use_header and gq.header:
        print gq.header

    for row in gq:
        print row
Esempio n. 4
0
def region(parser, args):

    if os.path.exists(args.db):

        formatter = select_formatter(args)
        gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter)

        if args.region is not None and args.gene is not None:
            sys.exit('EXITING: Choose either --reg or --gene, not both.\n')
        elif args.region is not None:
            get_region(args, gq)
        elif args.gene is not None:
            get_gene(args, gq)
Esempio n. 5
0
def run_query(args):
    predicates = get_row_predicates(args)
    add_required_columns_to_query(args)
    formatter = select_formatter(args)
    gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter)
    gq.run(args.query, args.gt_filter, args.show_variant_samples,
           args.sample_delim, predicates, needs_genotypes(args))

    if args.use_header and gq.header:
        print gq.header

    for row in gq:
        print row
Esempio n. 6
0
def family_wise_predicate(args):
    formatter = select_formatter(args)
    families = get_family_dict(args)
    gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter)
    predicates = []
    for f in families.values():
        family_names = [x.name for x in f]
        subjects = get_subjects_in_family(args, f).values()
        predicates.append(select_subjects_predicate(subjects, args,
                                                    family_names))
    def predicate(row):
        return sum([p(row) for p in predicates]) >= args.min_kindreds
    return predicate
Esempio n. 7
0
def run_query(args):
    predicates = get_row_predicates(args)
    add_required_columns_to_query(args)
    formatter = select_formatter(args)
    genotypes_needed = needs_genotypes(args)
    gene_needed = needs_gene(args)
    gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter)
    gq.run(
        args.query,
        args.gt_filter,
        args.show_variant_samples,
        args.sample_delim,
        predicates,
        genotypes_needed,
        gene_needed,
        args.show_families,
    )

    if args.use_header and gq.header:
        print gq.header

    if not args.dgidb:
        for row in gq:
            print row
    else:
        # collect a list of all the genes that need to be queried
        # from DGIdb
        genes = defaultdict()
        for row in gq:
            genes[row["gene"]] = True

        # collect info from DGIdb
        dgidb_info = query_dgidb(genes)

        # rerun the query (the cursor is now consumed)
        gq = GeminiQuery.GeminiQuery(args.db, out_format=formatter)
        gq.run(
            args.query,
            args.gt_filter,
            args.show_variant_samples,
            args.sample_delim,
            predicates,
            genotypes_needed,
            gene_needed,
            args.show_families,
        )

        # report the query results with DGIdb info added at the end.
        for row in gq:
            print str(row) + "\t" + str(dgidb_info[row["gene"]])
Esempio n. 8
0
def run_query(args):
    start_time = time.time()
    predicates = get_row_predicates(args)
    add_required_columns_to_query(args)
    formatter = select_formatter(args)
    genotypes_needed = needs_genotypes(args)
    gene_needed = needs_gene(args)
    sample_names_needed = args.sample_filter or args.family_wise
    gq = GeminiQuery.GeminiQuery(args.contact_points, args.keyspace, out_format=formatter)
    gq.run(args.query, args.gt_filter, args.show_variant_samples,
           args.sample_delim, predicates, genotypes_needed,
           gene_needed, args.show_families, args.testing, 
           sample_names_needed, args.cores, start_time, 
           args.use_header, args.exp_id, args.timeout,
           args.batch_size)
Esempio n. 9
0
def run_query(args):
    start_time = time.time()
    predicates = get_row_predicates(args)
    add_required_columns_to_query(args)
    formatter = select_formatter(args)
    genotypes_needed = needs_genotypes(args)
    gene_needed = needs_gene(args)
    sample_names_needed = args.sample_filter or args.family_wise
    gq = GeminiQuery.GeminiQuery(args.contact_points,
                                 args.keyspace,
                                 out_format=formatter)
    gq.run(args.query, args.gt_filter, args.show_variant_samples,
           args.sample_delim, predicates, genotypes_needed, gene_needed,
           args.show_families, args.testing, sample_names_needed, args.cores,
           start_time, args.use_header, args.exp_id, args.timeout,
           args.batch_size)