Ejemplo n.º 1
0
def create_partial_pickle(flnc_files, chunked_nfl_files, out_pickle):
    """
    Parameters:
      flnc_files -- full-length non-chimeric files in bins
      chunked_nfl_files -- chunked non-chimeric files
      out_pickle -- output pickle for saving PolishChunkTask objects
    """
    n_bins = len(flnc_files)
    n_nfl_chunks = max(1, len(chunked_nfl_files))

    log.info("Writing %s ice_partial chunk tasks to %s.",
             str(n_bins * n_nfl_chunks), out_pickle)
    p = ChunkTasksPickle()

    for i, flnc_file in enumerate(flnc_files):
        log.debug("Processing cluster bin index=%s.", i)
        cluster_out_dir = _get_cluster_out_dir(flnc_file)

        for j, nfl_file in enumerate(chunked_nfl_files):
            # Create Partial chunk tasks.
            task_ = PartialChunkTask(cluster_bin_index=i,
                                     flnc_file=flnc_file,
                                     cluster_out_dir=cluster_out_dir,
                                     nfl_file=nfl_file,
                                     nfl_index=j,
                                     n_nfl_chunks=n_nfl_chunks)
            p.append(task_)

    p.write(out_pickle)
    log.info("Saved %s partial chunk tasks to %s.", str(n_bins * n_nfl_chunks),
             out_pickle)
Ejemplo n.º 2
0
def create_polish_pickle(n_polish_chunks_in_bins, flnc_files, out_pickle):
    """
    Parameters:
      n_polish_chunks_in_bins -- number of ice_polish chunks in each bin
      flnc_files -- full-length non-chimeric files in bins
      out_pickle -- output pickle for saving PolishChunkTask objects
    """
    n_bins = len(flnc_files)
    assert isinstance(n_polish_chunks_in_bins, list)
    assert len(n_polish_chunks_in_bins) == n_bins

    log.info("Writing %s ice_polish chunk tasks to %s.",
             str(sum(n_polish_chunks_in_bins)), out_pickle)
    p = ChunkTasksPickle()

    for i, flnc_file in enumerate(flnc_files):
        log.debug("Creating %s ice_polish chunks for bin index=%s.",
                  str(n_polish_chunks_in_bins[i]), str(i))
        cluster_out_dir = _get_cluster_out_dir(flnc_file)

        for j in range(0, n_polish_chunks_in_bins[i]):
            # Create Polish chunk tasks.
            task_ = PolishChunkTask(cluster_bin_index=i,
                                    flnc_file=flnc_file,
                                    cluster_out_dir=cluster_out_dir,
                                    polish_index=j,
                                    n_polish_chunks=n_polish_chunks_in_bins[i])
            p.append(task_)

    p.write(out_pickle)
    log.info("Saved %s polish chunk tasks to %s.",
             str(sum(n_polish_chunks_in_bins)), out_pickle)
Ejemplo n.º 3
0
def create_cluster_pickle(flnc_files, out_pickle):
    """Create cluster chunk task pickle.
    Parameters:
      n_bins -- number of bins
      flnc_files -- full-length non-chimeric files in bins
      out_pickle -- output pickle for saving ClusterChunkTask objects
    """
    n_bins = len(flnc_files)
    log.info("Writing %s cluster chunk tasks to %s.", str(n_bins), out_pickle)
    p = ChunkTasksPickle()

    for i, flnc_file in enumerate(flnc_files):
        log.debug("Processing cluster bin index=%s.", i)
        cluster_out_dir = _get_cluster_out_dir(flnc_file)

        # Create Cluster chunk tasks.
        task_ = ClusterChunkTask(cluster_bin_index=i, flnc_file=flnc_file,
                                 cluster_out_dir=cluster_out_dir)
        p.append(task_)

    p.write(out_pickle)
    log.info("Saved %s cluster chunk tasks to %s.", str(n_bins), out_pickle)