def process_sequences_for_script(in_fpath_seq, file_format, pipeline, configuration, out_fpath, in_fpath_qual=None): '''It returns a repr file with the processed sequences The pipeline and configuration should be pickled object. ''' pipeline = pickle.loads(pipeline) configuration = pickle.loads(configuration) #the pipeline is now a list of strs we should convert it into a list of #dicts steps = dict([(step['name'], step) for step in STEPS]) #the pipeline step do not have real functions because they were not #pickeable, so we have to put the functions again in the steps for step in pipeline: step_name = step['name'] step['function'] = steps[step_name]['function'] if in_fpath_qual: in_fhand_qual = open(in_fpath_qual) else: in_fhand_qual = None processed_seqs = _process_sequences(open(in_fpath_seq), in_fhand_qual=in_fhand_qual, file_format=file_format, pipeline=pipeline, configuration=configuration) #now we write all seq in the file out_fhand = open(out_fpath, 'a') writer = SequenceWriter(fhand=out_fhand, file_format='pickle') for sequence in processed_seqs: writer.write(sequence) out_fhand.close()
def main(): "The main part" mira_path, iassembler_path = set_parameters() # guess the mira files that we need unigenes_fpath, unigenes_qual_fpath, mira_contig_read_fpath = get_mira_paths(mira_path) # create the iassembler project dir and subdirs if not os.path.exists(iassembler_path): os.makedirs(iassembler_path) mira_1_dir = os.path.join(iassembler_path, "{0:s}_Assembly".format(IASSEMBLER_INPUT_NAME), "mira") os.makedirs(mira_1_dir) # prepare contig readlist for iaasembler iassembler_contig_mem_fpath = os.path.join(mira_1_dir, "CMF10") process_contig_readlist(mira_contig_read_fpath, iassembler_contig_mem_fpath) # copy unigene files into the iassembler project iassembler_unigenes = os.path.join(mira_1_dir, "mira2.fa") iassembler_unigenes_qual = os.path.join(mira_1_dir, "mira2.fa.qual") iassembler_unigenes_fh = open(iassembler_unigenes, "w") iassembler_unigenes_qual_fh = open(iassembler_unigenes_qual, "w") seq_writer = SequenceWriter( fhand=iassembler_unigenes_fh, file_format="fasta", qual_fhand=iassembler_unigenes_qual_fh ) for seq in seqs_in_file(seq_fhand=open(unigenes_fpath), format="sfastq", qual_fhand=open(unigenes_qual_fpath)): seq.name = "mira_{0:s}".format(seq.name.split("_", 1)[1]) seq.id = seq.name seq_writer.write(seq) # create iassembler input files. seq_fhand = open(os.path.join(iassembler_path, IASSEMBLER_INPUT_NAME), "w") qual_fhand = open(os.path.join(iassembler_path, IASSEMBLER_INPUT_NAME + ".qual"), "w") for file_ in os.listdir(os.path.join(mira_path, "..")): if "_in" in file_: file_ = os.path.join(mira_path, "..", file_) seqio( in_seq_fhand=open(file_), out_seq_fhand=seq_fhand, out_qual_fhand=qual_fhand, in_format="sfastq", out_format="fasta", ) msg = "To run iassembler you must use this command:\n" msg += "iassembler -c -i {0:s}\n".format(IASSEMBLER_INPUT_NAME) msg += "From your iassembler dir:{0:s}".format(iassembler_path) print msg