def run_main(chunk_json, contigset_output, chunk_key):
    """run main"""
    chunks = load_pipeline_chunks_from_json(chunk_json)

    # Allow looseness
    if not chunk_key.startswith('$chunk.'):
        chunk_key = '$chunk.' + chunk_key
        log.warn("Prepending chunk key with '$chunk.' to '%s'", str(chunk_key))

    fasta_files = get_datum_from_chunks_by_chunk_key(chunks, chunk_key)
    log.debug("Chunked consensus isoforms files are %s.", (', '.join(fasta_files)))

    out_fa = CombinedFiles(combined_dir=op.dirname(contigset_output)).all_consensus_isoforms_fa
    combine_consensus_isoforms(split_indices=range(0, len(fasta_files)),
                               split_files=fasta_files,
                               combined_consensus_isoforms_fa=out_fa)
    log.info("Combining files to %s.", out_fa)

    log.info("Writing contigset %s", contigset_output)
    assert contigset_output.endswith('xml')
    as_contigset(out_fa, contigset_output)

    #cs = ContigSet(*fasta_files)
    #cs.newUuid()
    #cs.write(contigset_output)
    return 0
Example #2
0
def run_main(fastq_file, gmap_ref_file, output_json_file, max_nchunks):
    """
    Parameters:
      fastq_file -- HQ isoforms in FASTQ
      gmap_ref_file -- GMAP reference set xml
      output_json -- chunk.json
    """
    # Chunk FASTQ
    output_fastq_json = output_json_file + ".fastq.json"
    output_dir = op.dirname(output_json_file)
    CU.write_fastq_chunks_to_file(output_fastq_json, fastq_file, max_nchunks,
                                  output_dir, "scattered-fastq", "fastq")

    # get fastq_ids from output_fastq_json
    fastq_chunks = load_pipeline_chunks_from_json(output_fastq_json)
    fastq_files = get_datum_from_chunks_by_chunk_key(fastq_chunks,
                                                     "$chunk.fastq_id")
    log.debug("Chunked FASTQ files are %s.", (', '.join(fastq_files)))

    # Writing chunk.json
    chunks = []
    for i, fastq_file in enumerate(fastq_files):
        chunk_id = "_".join(["map_isoforms_to_genome_chunk", str(i)])
        d = {
            Constants.CHUNK_KEYS[0]: fastq_file,
            Constants.CHUNK_KEYS[1]: gmap_ref_file
        }
        c = PipelineChunk(chunk_id, **d)
        chunks.append(c)

    log.info("Writing chunk.json to %s", output_json_file)
    write_pipeline_chunks(chunks, output_json_file,
                          "created by %s" % Constants.TOOL_ID)
    return 0
Example #3
0
def run_main(chunk_json, fasta_output, chunk_key):
    chunks = load_pipeline_chunks_from_json(chunk_json)

    # Allow looseness
    if not chunk_key.startswith('$chunk.'):
        chunk_key = '$chunk.' + chunk_key
        log.warn("Prepending chunk key with '$chunk.' to '{c}'".format(c=chunk_key))

    fastx_files = get_datum_from_chunks_by_chunk_key(chunks, chunk_key)
    _ = gather_fasta(fastx_files, fasta_output)

    return 0
Example #4
0
def run_main(chunk_json, fasta_output, chunk_key):
    chunks = load_pipeline_chunks_from_json(chunk_json)

    # Allow looseness
    if not chunk_key.startswith('$chunk.'):
        chunk_key = '$chunk.' + chunk_key
        log.warn(
            "Prepending chunk key with '$chunk.' to '{c}'".format(c=chunk_key))

    fastx_files = get_datum_from_chunks_by_chunk_key(chunks, chunk_key)
    _ = gather_fasta(fastx_files, fasta_output)

    return 0
Example #5
0
def chunk_contigset(in_file, n_chunks, out_dir, out_chunk_json):
    """
    Chunk input contigset into n_chunks under out_dir, and
    write chunk info to out_chunk_json, return chunked files.
    """
    log.info("Splitting %s into %s chunks", in_file, str(n_chunks))
    CU.write_contigset_chunks_to_file(out_chunk_json, in_file, n_chunks,
                                      out_dir, "scattered-nfl", "contigset.xml")

    out_chunks = load_pipeline_chunks_from_json(out_chunk_json)
    chunked_files = get_datum_from_chunks_by_chunk_key(out_chunks, '$chunk.contigset_id')
    log.info("Splitted files are %s\n", ("\n".join(chunked_files)))

    # Return chunked files from out_chunk_json
    return chunked_files
def run_main(chunk_json, fofn_output, chunk_key):
  with cd(os.path.dirname(fofn_output)):
    chunks = load_pipeline_chunks_from_json(chunk_json)

    # Allow looseness
    if not chunk_key.startswith('$chunk.'):
        chunk_key = '$chunk.' + chunk_key
        log.warn("Prepending chunk key with '$chunk.' to '{c}'".format(c=chunk_key))

    fofn_files = get_datum_from_chunks_by_chunk_key(chunks, chunk_key)
    print("fofn_files:%s %s" %(repr(fofn_files), repr(fofn_output)))
    # Combine all into one.
    with open(fofn_output, 'w') as ofs:
        for fn in fofn_files:
            with open(fn) as ifs:
                ofs.write(ifs.read())
def run_main(chunk_json, sam_output, chunk_key):
    """run main"""
    chunks = load_pipeline_chunks_from_json(chunk_json)

    # Allow looseness
    if not chunk_key.startswith('$chunk.'):
        chunk_key = '$chunk.' + chunk_key
        log.warn("Prepending chunk key with '$chunk.' to '%s'", str(chunk_key))

    sam_files = get_datum_from_chunks_by_chunk_key(chunks, chunk_key)
    log.debug("Chunked SAM files are %s.", (', '.join(sam_files)))

    log.info("Concatenate chunked SAM files to %s.", sam_output)
    concatenate_sam(sam_files, sam_output)

    return 0
def run_main(fastq_file, gmap_ref_file, output_json_file, max_nchunks):
    """
    Parameters:
      fastq_file -- HQ isoforms in FASTQ
      gmap_ref_file -- GMAP reference set xml
      output_json -- chunk.json
    """
    # Check size of fastq_file before scattering, so that a meaningful
    # error message can be displayed instead of 'float division by zero'
    if os.stat(fastq_file).st_size == 0:
        raise IOError("Fastq file %s is empty, exiting." % fastq_file)

    # Chunk FASTQ
    output_fastq_json = output_json_file + ".fastq.json"
    output_dir = op.dirname(output_json_file)
    CU.write_fastq_chunks_to_file(output_fastq_json, fastq_file, max_nchunks,
                                  output_dir, "scattered-fastq", "fastq")

    # get fastq_ids from output_fastq_json
    fastq_chunks = load_pipeline_chunks_from_json(output_fastq_json)
    fastq_files = get_datum_from_chunks_by_chunk_key(fastq_chunks,
                                                     "$chunk.fastq_id")
    log.debug("Chunked FASTQ files are %s.", (', '.join(fastq_files)))

    # Writing chunk.json
    chunks = []
    for i, fastq_file in enumerate(fastq_files):
        chunk_id = "_".join(["map_isoforms_to_genome_chunk", str(i)])
        d = {
            Constants.CHUNK_KEYS[0]: fastq_file,
            Constants.CHUNK_KEYS[1]: gmap_ref_file
        }
        c = PipelineChunk(chunk_id, **d)
        chunks.append(c)

    log.info("Writing chunk.json to %s", output_json_file)
    write_pipeline_chunks(chunks, output_json_file,
                          "created by %s" % Constants.TOOL_ID)
    return 0
def run_main(fastq_file, gmap_ref_file, output_json_file, max_nchunks):
    """
    Parameters:
      fastq_file -- HQ isoforms in FASTQ
      gmap_ref_file -- GMAP reference set xml
      output_json -- chunk.json
    """
    # Check size of fastq_file before scattering, so that a meaningful
    # error message can be displayed instead of 'float division by zero'
    if os.stat(fastq_file).st_size == 0:
        raise IOError("Fastq file %s is empty, exiting." % fastq_file)

    # Chunk FASTQ
    output_fastq_json = output_json_file + ".fastq.json"
    output_dir = op.dirname(output_json_file)
    CU.write_fastq_chunks_to_file(output_fastq_json, fastq_file, max_nchunks,
                                  output_dir, "scattered-fastq", "fastq")

    # get fastq_ids from output_fastq_json
    fastq_chunks = load_pipeline_chunks_from_json(output_fastq_json)
    fastq_files = get_datum_from_chunks_by_chunk_key(fastq_chunks, "$chunk.fastq_id")
    log.debug("Chunked FASTQ files are %s.", (', '.join(fastq_files)))

    # Writing chunk.json
    chunks = []
    for i, fastq_file in enumerate(fastq_files):
        chunk_id = "_".join(["map_isoforms_to_genome_chunk", str(i)])
        d = {Constants.CHUNK_KEYS[0]: fastq_file,
             Constants.CHUNK_KEYS[1]: gmap_ref_file}
        c = PipelineChunk(chunk_id, **d)
        chunks.append(c)

    log.info("Writing chunk.json to %s", output_json_file)
    write_pipeline_chunks(chunks, output_json_file,
                          "created by %s" % Constants.TOOL_ID)
    return 0
Example #10
0
def run_main(chunk_json, sam_output, chunk_key):
    """run main"""
    chunks = load_pipeline_chunks_from_json(chunk_json)

    # Allow looseness
    if not chunk_key.startswith('$chunk.'):
        chunk_key = '$chunk.' + chunk_key
        log.warn("Prepending chunk key with '$chunk.' to '%s'", str(chunk_key))

    sam_files = get_datum_from_chunks_by_chunk_key(chunks, chunk_key)
    log.debug("Chunked SAM files are %s.", (', '.join(sam_files)))

    log.info("Concatenate chunked SAM files to %s.", sam_output)

    # concatenate sam files
    unsorted_sam_output = sam_output + ".unsorted.sam"
    concatenate_sam(sam_files, unsorted_sam_output)

    # then sort
    sort_sam(unsorted_sam_output, sam_output)

    # remove intermediate file
    rmpath(unsorted_sam_output)
    return 0
Example #11
0
def run(chunk_input_json, output_file, chunk_key):
    chunks = load_pipeline_chunks_from_json(chunk_input_json)
    chunked_files = get_datum_from_chunks_by_chunk_key(chunks, chunk_key)
    _ = combine_nfl_pickles(chunked_files, output_file)
    return 0
Example #12
0
def run(chunk_input_json, output_file, chunk_key):
    chunks = load_pipeline_chunks_from_json(chunk_input_json)
    chunked_files = get_datum_from_chunks_by_chunk_key(chunks, chunk_key)
    _ = combine_nfl_pickles(chunked_files, output_file)
    return 0