def chunk_iterator(itr, fn, n=100, show_progress=True, label=None): '''Breaks an iterable into chunks and applies a function to each chunk. Arguments: - itr the iterable to be chunked - fn the function to be applied to each chunks - n the size of each chunk - show_progress show a progress bar - label the label to show on the progress bar ''' if len(itr) == 0: return label = "" if label is None else label if len(itr)/n <= 1: show_progress = False swga.message(label) chunked_itr = chunks(itr, n) if show_progress: chunked = progress.bar( chunked_itr, label=label, expected_size=max(len(itr)/n, 1)) else: chunked = chunked_itr for chunk in chunked: fn(chunk)
def main(argv, cfg_file): cmd = Command('find_sets', cfg_file=cfg_file) score_cmd = Command('score', cfg_file=cfg_file) cmd.parse_args(argv) score_cmd.parse_args(argv) init_db(cmd.primer_db) # We need to clear all the previously-used sets each time due to uniqueness # constraints allsets = Set.select() if allsets.count() > 0: if not cmd.force: click.confirm("Remove all previously-found sets?", abort=True) for s in progress.bar(allsets, expected_size=allsets.count()): s.primers.clear() s.delete_instance() make_graph(cmd.max_dimer_bp, graph_fname) swga.message("Now finding sets. If nothing appears, try relaxing your parameters.") if cmd.workers <= 1: setlines = setfinder.find_sets( cmd.min_bg_bind_dist, cmd.min_size, cmd.max_size, cmd.bg_genome_len, graph_fp=graph_fname) else: setlines = setfinder.mp_find_sets( nprocesses=cmd.workers, graph_fp=graph_fname, min_bg_bind_dist=cmd.min_bg_bind_dist, min_size=cmd.min_size, max_size=cmd.max_size, bg_genome_len=cmd.bg_genome_len) score_sets( setlines, cmd.fg_genome_fp, score_cmd.score_expression, cmd.max_fg_bind_dist, cmd.max_sets)