예제 #1
0
def process_sequences_for_script(in_fpath_seq, file_format, pipeline,
                                 configuration, out_fpath, in_fpath_qual=None):
    '''It returns a repr file with the processed sequences

    The pipeline and configuration should be pickled object.
    '''
    pipeline = pickle.loads(pipeline)
    configuration = pickle.loads(configuration)
    #the pipeline is now a list of strs we should convert it into a list of
    #dicts
    steps = dict([(step['name'], step) for step in STEPS])
    #the pipeline step do not have real functions because they were not
    #pickeable, so we have to put the functions again in the steps
    for step in pipeline:
        step_name = step['name']
        step['function'] = steps[step_name]['function']

    if in_fpath_qual:
        in_fhand_qual = open(in_fpath_qual)
    else:
        in_fhand_qual = None
    processed_seqs = _process_sequences(open(in_fpath_seq),
                                        in_fhand_qual=in_fhand_qual,
                                        file_format=file_format,
                                        pipeline=pipeline,
                                        configuration=configuration)
    #now we write all seq in the file
    out_fhand = open(out_fpath, 'a')
    writer = SequenceWriter(fhand=out_fhand,
                            file_format='pickle')
    for sequence in processed_seqs:
        writer.write(sequence)
    out_fhand.close()
예제 #2
0
def main():
    "The main part"

    mira_path, iassembler_path = set_parameters()

    # guess the mira files that we need
    unigenes_fpath, unigenes_qual_fpath, mira_contig_read_fpath = get_mira_paths(mira_path)

    # create the iassembler project dir and subdirs
    if not os.path.exists(iassembler_path):
        os.makedirs(iassembler_path)
    mira_1_dir = os.path.join(iassembler_path, "{0:s}_Assembly".format(IASSEMBLER_INPUT_NAME), "mira")
    os.makedirs(mira_1_dir)

    # prepare contig readlist for iaasembler
    iassembler_contig_mem_fpath = os.path.join(mira_1_dir, "CMF10")
    process_contig_readlist(mira_contig_read_fpath, iassembler_contig_mem_fpath)

    # copy unigene files into the iassembler project
    iassembler_unigenes = os.path.join(mira_1_dir, "mira2.fa")
    iassembler_unigenes_qual = os.path.join(mira_1_dir, "mira2.fa.qual")
    iassembler_unigenes_fh = open(iassembler_unigenes, "w")
    iassembler_unigenes_qual_fh = open(iassembler_unigenes_qual, "w")
    seq_writer = SequenceWriter(
        fhand=iassembler_unigenes_fh, file_format="fasta", qual_fhand=iassembler_unigenes_qual_fh
    )

    for seq in seqs_in_file(seq_fhand=open(unigenes_fpath), format="sfastq", qual_fhand=open(unigenes_qual_fpath)):
        seq.name = "mira_{0:s}".format(seq.name.split("_", 1)[1])
        seq.id = seq.name
        seq_writer.write(seq)

    # create iassembler input files.
    seq_fhand = open(os.path.join(iassembler_path, IASSEMBLER_INPUT_NAME), "w")
    qual_fhand = open(os.path.join(iassembler_path, IASSEMBLER_INPUT_NAME + ".qual"), "w")

    for file_ in os.listdir(os.path.join(mira_path, "..")):
        if "_in" in file_:
            file_ = os.path.join(mira_path, "..", file_)
            seqio(
                in_seq_fhand=open(file_),
                out_seq_fhand=seq_fhand,
                out_qual_fhand=qual_fhand,
                in_format="sfastq",
                out_format="fasta",
            )

    msg = "To run iassembler you must use this command:\n"
    msg += "iassembler -c -i {0:s}\n".format(IASSEMBLER_INPUT_NAME)
    msg += "From your iassembler dir:{0:s}".format(iassembler_path)
    print msg