Beispiel #1
0
def chunk_iterator(itr, fn, n=100, show_progress=True, label=None):
    '''Breaks an iterable into chunks and applies a function to each chunk.
    Arguments:
    - itr the iterable to be chunked
    - fn the function to be applied to each chunks
    - n the size of each chunk
    - show_progress show a progress bar
    - label the label to show on the progress bar
    '''
    if len(itr) == 0:
        return
    label = "" if label is None else label
    if len(itr)/n <= 1:
        show_progress = False
        swga.message(label)
    chunked_itr = chunks(itr, n)
    if show_progress:
        chunked = progress.bar(
            chunked_itr,
            label=label,
            expected_size=max(len(itr)/n, 1))
    else:
        chunked = chunked_itr
    for chunk in chunked:
        fn(chunk)
Beispiel #2
0
def main(argv, cfg_file):
    cmd = Command('find_sets', cfg_file=cfg_file)
    score_cmd = Command('score', cfg_file=cfg_file)
    cmd.parse_args(argv)
    score_cmd.parse_args(argv)

    init_db(cmd.primer_db)

    # We need to clear all the previously-used sets each time due to uniqueness
    # constraints
    allsets = Set.select()
    if allsets.count() > 0:
        if not cmd.force:
            click.confirm("Remove all previously-found sets?", abort=True)
        for s in progress.bar(allsets, expected_size=allsets.count()):
            s.primers.clear()
            s.delete_instance()

    make_graph(cmd.max_dimer_bp, graph_fname)

    swga.message("Now finding sets. If nothing appears, try relaxing your parameters.")
    if cmd.workers <= 1:
        setlines = setfinder.find_sets(
            cmd.min_bg_bind_dist,
            cmd.min_size,
            cmd.max_size,
            cmd.bg_genome_len,
            graph_fp=graph_fname)
    else:
        setlines = setfinder.mp_find_sets(
            nprocesses=cmd.workers,
            graph_fp=graph_fname,
            min_bg_bind_dist=cmd.min_bg_bind_dist,
            min_size=cmd.min_size,
            max_size=cmd.max_size,
            bg_genome_len=cmd.bg_genome_len)

    score_sets(
        setlines,
        cmd.fg_genome_fp,
        score_cmd.score_expression,
        cmd.max_fg_bind_dist,
        cmd.max_sets)