Beispiel #1
0
def main():
    global POST
    input_ = sys.argv[1]
    DEBUG_MODE_=DEBUG_MODE
    if len(sys.argv) > 2:
       DEBUG_MODE_=True
    (intervalq,rangeq,sampleq,idq) = (None,None,None,None)
    #(intervalq,rangeq,sampleq,idq) = ([],[],[],[])
    sys.stderr.write("%s\n" % input_)
    if input_[0] == '[' or input_[1] == '[' or input_[2] == '[':
        (intervalq,rangeq,sampleq,idq) = parse_json_query(input_)
        POST=True
    #update support simple '&' CGI format
    else:
    #    (intervalq,rangeq,sampleq,idq) = input_.split('|')
        (intervalq,idq,rangeq,sampleq) = process_params(input_)

    sample_map = snample.load_sample_metadata(snapconf.SAMPLE_MD_FILE)
    if DEBUG_MODE_:
        sys.stderr.write("loaded %d samples metadata\n" % (len(sample_map)))

    #first we build filter-by-snaptron_id list based either (or all) on passed ids directly
    #and/or what's dervied from the sample query and/or what sample ids were passed in as well
    #NOTE this is the only place where we have OR logic, i.e. the set of snaptron_ids passed in
    #and the set of snaptron_ids dervied from the passed in sample_ids are OR'd together in the filtering
    snaptron_ids = set()
    if len(idq) >= 1:
        query_ids(idq,snaptron_ids)

    #if we have any sample related queries, do them to get snaptron_id filter set
    #NOTE we are NOT currently support sample-id querying
    if len(sampleq) >= 1:
        snaptron_ids = snample.query_samples(sampleq,sample_map,snaptron_ids)

    #end result here is that we have a list of snaptron_ids to filter by
    #or if no snaptron_ids were found we're done, in keeping with the strict AND policy (currently)
    #TODO: update this when we start supporting OR in the POSTs, this will need to change
    if len(snaptron_ids) == 0 and (len(idq) >=1 or len(sampleq) >= 1):
        return

    #NOW start normal query processing between: 1) interval 2) range or 3) or just snaptron ids
    #note: 1) and 3) use tabix, 2) uses lucene
    #sample_set = set()
    #UPDATE: prefer tabix queries of either interval or snaptron_ids rather than lucene search of range queries due to speed
    #if len(snaptron_ids) > 0 and len(intervalq) == 0 and (len(rangeq) == 0 or not first_tdb):
    #back to usual processing, interval queries come first possibly with filters from the point range queries and/or ids
    found_snaptron_ids = set()
    found_sample_ids = set()
    if len(intervalq) >= 1:
        (found_snaptron_ids,found_sample_ids) = query_regions(intervalq,rangeq,snaptron_ids,filtering=RESULT_COUNT)
    elif len(snaptron_ids) >= 1:
        rquery = range_query_parser(rangeq,snaptron_ids)
        (found_snaptron_ids,found_sample_ids) = search_introns_by_ids(snaptron_ids,rquery,filtering=RESULT_COUNT)
    #finally if there's no interval OR id query to use with tabix, use a point range query (first_rquery) with additional filters from the following point range queries and/or ids in lucene
    elif len(rangeq) >= 1:
        #run_tabix(first_rquery,rquery,first_tdb,filter_set=snaptron_ids,sample_set=sample_set,debug=DEBUG_MODE_)
        (found_snaptron_ids,found_sample_ids) = search_ranges_lucene(rangeq,snaptron_ids,stream_back=True,filtering=RESULT_COUNT)
    
    if RESULT_COUNT:
        sys.stdout.write("%d\n" % (len(found_snaptron_ids)))
Beispiel #2
0
def main():
    input_ = sys.argv[1]
    DEBUG_MODE_=DEBUG_MODE
    global FORCE_SQLITE
    global FORCE_TABIX
    if len(sys.argv) == 3:
       DEBUG_MODE_=True
    if len(sys.argv) == 4:
       FORCE_SQLITE=True
    if len(sys.argv) == 5:
       FORCE_TABIX=True
    (intervalq,rangeq,idq) = (None,None,None)
    sampleq = []
    #(intervalq,rangeq,sampleq,idq) = ([],[],[],[])
    sys.stderr.write("%s\n" % input_)
    sample_map = snample.load_sample_metadata(snapconf.SAMPLE_MD_FILE)
    if DEBUG_MODE_:
        sys.stderr.write("loaded %d samples metadata\n" % (len(sample_map)))
    #make copy of the region_args tuple
    ra = default_region_args
    if '[' in input_:
        (or_intervals,or_ranges,or_samples,or_ids,ra) = process_post_params(input_)
        #(intervalq,rangeq,sampleq,idq) = (or_intervals[0],or_ranges[0],or_samples[0],or_ids[0])
        for idx in (xrange(0,len(or_intervals))):
            run_toplevel_AND_query(or_intervals[idx],or_ranges[idx],or_samples[idx],or_ids[idx],sample_map=sample_map,ra=ra)
            ra=ra._replace(print_header=False)
    #update support simple '&' CGI format
    else:
        (intervalq,idq,rangeq,sampleq,ra) = process_params(input_)
        run_toplevel_AND_query(intervalq,rangeq,sampleq,idq,sample_map=sample_map,ra=ra)
def main():
    input_ = sys.argv[1]
    DEBUG_MODE_ = DEBUG_MODE
    global FORCE_SQLITE
    global FORCE_TABIX
    if len(sys.argv) == 3:
        DEBUG_MODE_ = True
    if len(sys.argv) == 4:
        FORCE_SQLITE = True
    if len(sys.argv) == 5:
        FORCE_TABIX = True
    (intervalq, rangeq, idq) = (None, None, None)
    sampleq = []
    #(intervalq,rangeq,sampleq,idq) = ([],[],[],[])
    sys.stderr.write("%s\n" % input_)
    sample_map = snample.load_sample_metadata(snapconf.SAMPLE_MD_FILE)
    if DEBUG_MODE_:
        sys.stderr.write("loaded %d samples metadata\n" % (len(sample_map)))
    #make copy of the region_args tuple
    ra = default_region_args
    if '[' in input_:
        (or_intervals, or_ranges, or_samples, or_ids,
         ra) = process_post_params(input_)
        #(intervalq,rangeq,sampleq,idq) = (or_intervals[0],or_ranges[0],or_samples[0],or_ids[0])
        for idx in (xrange(0, len(or_intervals))):
            run_toplevel_AND_query(or_intervals[idx],
                                   or_ranges[idx],
                                   or_samples[idx],
                                   or_ids[idx],
                                   sample_map=sample_map,
                                   ra=ra)
            ra = ra._replace(print_header=False)
    #update support simple '&' CGI format
    else:
        (intervalq, idq, rangeq, sampleq, ra) = process_params(input_)
        run_toplevel_AND_query(intervalq,
                               rangeq,
                               sampleq,
                               idq,
                               sample_map=sample_map,
                               ra=ra)
Beispiel #4
0
def main():
    input_ = sys.argv[1]
    DEBUG_MODE_=DEBUG_MODE
    global FORCE_SQLITE
    global FORCE_TABIX
    if len(sys.argv) == 3:
       DEBUG_MODE_=True
    if len(sys.argv) == 4:
       FORCE_SQLITE=True
    if len(sys.argv) == 5:
       FORCE_TABIX=True
    (intervalq,rangeq,idq) = (None,None,None)
    sampleq = []
    #(intervalq,rangeq,sampleq,idq) = ([],[],[],[])
    sys.stderr.write("%s\n" % input_)
    sample_map = snample.load_sample_metadata(snapconf.SAMPLE_MD_FILE)
    if DEBUG_MODE_:
        sys.stderr.write("loaded %d samples metadata\n" % (len(sample_map)))
    #make copy of the region_args tuple
    ra = default_region_args
    #bulk query mode
    #somewhat ad hoc, but with the first test
    #trying to avoid a pattern search across the whole input string
    #which could be large
    if input_[:6] == 'group=' or 'group=' in input_:
        for query in re.split(snapconfshared.BULK_QUERY_DELIMITER,input_):
            (intervalq,idq,rangeq,sampleq,ra) = process_params(query)
            run_toplevel_AND_query(intervalq,rangeq,sampleq,idq,sample_map=sample_map,ra=ra)
    elif '[' in input_:
        (or_intervals,or_ranges,or_samples,or_ids,ra) = process_post_params(input_)
        #(intervalq,rangeq,sampleq,idq) = (or_intervals[0],or_ranges[0],or_samples[0],or_ids[0])
        for idx in (xrange(0,len(or_intervals))):
            run_toplevel_AND_query(or_intervals[idx],or_ranges[idx],or_samples[idx],or_ids[idx],sample_map=sample_map,ra=ra)
            ra=ra._replace(print_header=False)
    #update support simple '&' CGI format
    else:
        (intervalq,idq,rangeq,sampleq,ra) = process_params(input_)
        run_toplevel_AND_query(intervalq,rangeq,sampleq,idq,sample_map=sample_map,ra=ra)