def create_partial_pickle(flnc_files, chunked_nfl_files, out_pickle): """ Parameters: flnc_files -- full-length non-chimeric files in bins chunked_nfl_files -- chunked non-chimeric files out_pickle -- output pickle for saving PolishChunkTask objects """ n_bins = len(flnc_files) n_nfl_chunks = max(1, len(chunked_nfl_files)) log.info("Writing %s ice_partial chunk tasks to %s.", str(n_bins * n_nfl_chunks), out_pickle) p = ChunkTasksPickle() for i, flnc_file in enumerate(flnc_files): log.debug("Processing cluster bin index=%s.", i) cluster_out_dir = _get_cluster_out_dir(flnc_file) for j, nfl_file in enumerate(chunked_nfl_files): # Create Partial chunk tasks. task_ = PartialChunkTask(cluster_bin_index=i, flnc_file=flnc_file, cluster_out_dir=cluster_out_dir, nfl_file=nfl_file, nfl_index=j, n_nfl_chunks=n_nfl_chunks) p.append(task_) p.write(out_pickle) log.info("Saved %s partial chunk tasks to %s.", str(n_bins * n_nfl_chunks), out_pickle)
def create_polish_pickle(n_polish_chunks_in_bins, flnc_files, out_pickle): """ Parameters: n_polish_chunks_in_bins -- number of ice_polish chunks in each bin flnc_files -- full-length non-chimeric files in bins out_pickle -- output pickle for saving PolishChunkTask objects """ n_bins = len(flnc_files) assert isinstance(n_polish_chunks_in_bins, list) assert len(n_polish_chunks_in_bins) == n_bins log.info("Writing %s ice_polish chunk tasks to %s.", str(sum(n_polish_chunks_in_bins)), out_pickle) p = ChunkTasksPickle() for i, flnc_file in enumerate(flnc_files): log.debug("Creating %s ice_polish chunks for bin index=%s.", str(n_polish_chunks_in_bins[i]), str(i)) cluster_out_dir = _get_cluster_out_dir(flnc_file) for j in range(0, n_polish_chunks_in_bins[i]): # Create Polish chunk tasks. task_ = PolishChunkTask(cluster_bin_index=i, flnc_file=flnc_file, cluster_out_dir=cluster_out_dir, polish_index=j, n_polish_chunks=n_polish_chunks_in_bins[i]) p.append(task_) p.write(out_pickle) log.info("Saved %s polish chunk tasks to %s.", str(sum(n_polish_chunks_in_bins)), out_pickle)
def create_cluster_pickle(flnc_files, out_pickle): """Create cluster chunk task pickle. Parameters: n_bins -- number of bins flnc_files -- full-length non-chimeric files in bins out_pickle -- output pickle for saving ClusterChunkTask objects """ n_bins = len(flnc_files) log.info("Writing %s cluster chunk tasks to %s.", str(n_bins), out_pickle) p = ChunkTasksPickle() for i, flnc_file in enumerate(flnc_files): log.debug("Processing cluster bin index=%s.", i) cluster_out_dir = _get_cluster_out_dir(flnc_file) # Create Cluster chunk tasks. task_ = ClusterChunkTask(cluster_bin_index=i, flnc_file=flnc_file, cluster_out_dir=cluster_out_dir) p.append(task_) p.write(out_pickle) log.info("Saved %s cluster chunk tasks to %s.", str(n_bins), out_pickle)