def make_backbone_blast_db(project_dir, blast_db_seq, dbtype): 'It formats a blastdb when need it' logger = logging.getLogger(LOGGER_NAME) #the name should be the basename of the blast_db_seq db_dir = join(project_dir, BACKBONE_DIRECTORIES['blast_databases']) if not exists(db_dir): makedirs(db_dir) db_seq_fpath = join(db_dir, _get_basename(blast_db_seq)) if not exists(db_seq_fpath): #which is the name of the new databae? blast_db_seq_format = guess_seq_file_format(open(blast_db_seq)) if blast_db_seq_format == 'fasta': rel_symlink(blast_db_seq, db_seq_fpath) else: seqio(in_seq_fhand=open(blast_db_seq), out_seq_fhand=open(db_seq_fpath, 'w'), out_format='fasta') logger.info('Formatting the database %s' % db_seq_fpath) try: makeblastdb_plus(db_seq_fpath, dbtype=dbtype) except RuntimeError: msg = 'Error making blastdb. db:%s\n dbtype:%s\n' % \ (db_seq_fpath, dbtype) remove(db_seq_fpath) raise RuntimeError(msg) return db_seq_fpath
def _create_temp_fasta_file(fpath): 'It creates a fasta file format temfile' fasta_fhand = NamedTemporaryFile(suffix='.fasta', mode='a') seqio(in_seq_fhand=open(fpath), out_seq_fhand=fasta_fhand, out_format='fasta') return fasta_fhand
def main(): 'The main function' (in_seq_fhand, in_qual_fhand, in_format, out_seq_fhand, out_qual_fhand, out_format, double_encoding) = set_parameters() seqio(in_seq_fhand=in_seq_fhand, in_qual_fhand=in_qual_fhand, in_format=in_format, out_seq_fhand=out_seq_fhand, out_qual_fhand=out_qual_fhand, out_format=out_format, double_encoding=double_encoding)
def main(): "The main part" mira_path, iassembler_path = set_parameters() # guess the mira files that we need unigenes_fpath, unigenes_qual_fpath, mira_contig_read_fpath = get_mira_paths(mira_path) # create the iassembler project dir and subdirs if not os.path.exists(iassembler_path): os.makedirs(iassembler_path) mira_1_dir = os.path.join(iassembler_path, "{0:s}_Assembly".format(IASSEMBLER_INPUT_NAME), "mira") os.makedirs(mira_1_dir) # prepare contig readlist for iaasembler iassembler_contig_mem_fpath = os.path.join(mira_1_dir, "CMF10") process_contig_readlist(mira_contig_read_fpath, iassembler_contig_mem_fpath) # copy unigene files into the iassembler project iassembler_unigenes = os.path.join(mira_1_dir, "mira2.fa") iassembler_unigenes_qual = os.path.join(mira_1_dir, "mira2.fa.qual") iassembler_unigenes_fh = open(iassembler_unigenes, "w") iassembler_unigenes_qual_fh = open(iassembler_unigenes_qual, "w") seq_writer = SequenceWriter( fhand=iassembler_unigenes_fh, file_format="fasta", qual_fhand=iassembler_unigenes_qual_fh ) for seq in seqs_in_file(seq_fhand=open(unigenes_fpath), format="sfastq", qual_fhand=open(unigenes_qual_fpath)): seq.name = "mira_{0:s}".format(seq.name.split("_", 1)[1]) seq.id = seq.name seq_writer.write(seq) # create iassembler input files. seq_fhand = open(os.path.join(iassembler_path, IASSEMBLER_INPUT_NAME), "w") qual_fhand = open(os.path.join(iassembler_path, IASSEMBLER_INPUT_NAME + ".qual"), "w") for file_ in os.listdir(os.path.join(mira_path, "..")): if "_in" in file_: file_ = os.path.join(mira_path, "..", file_) seqio( in_seq_fhand=open(file_), out_seq_fhand=seq_fhand, out_qual_fhand=qual_fhand, in_format="sfastq", out_format="fasta", ) msg = "To run iassembler you must use this command:\n" msg += "iassembler -c -i {0:s}\n".format(IASSEMBLER_INPUT_NAME) msg += "From your iassembler dir:{0:s}".format(iassembler_path) print msg
def test_fastq_to_fastq_solexa(): 'It tests the conversion using the Biopython convert function' fcontent = '@seq1\n' fcontent += 'CCCT\n' fcontent += '+\n' fcontent += ';;3;\n' fcontent += '@SRR001666.1\n' fcontent += 'GTTGC\n' fcontent += '+\n' fcontent += ';;;;;\n' fhand = StringIO.StringIO(fcontent) out_seq_fhand = StringIO.StringIO() seqio(in_seq_fhand=fhand, in_format='fastq', out_seq_fhand=out_seq_fhand, out_format='fastq-solexa') result = '@seq1\nCCCT\n+\nZZRZ\[email protected]\nGTTGC\n+\nZZZZZ\n' assert out_seq_fhand.getvalue() == result
def _files_to_temp_fasta(files): 'It converts the given files to a temporary fasta and qual' fastas, quals = [], [] for file_ in files: #are we dealing with a fastq file (with qual) if 'fastq' in os.path.splitext(file_.name)[-1]: qual = NamedTemporaryFile(suffix='.qual') fasta = NamedTemporaryFile(suffix='.fasta') seqio(in_seq_fhand=file_, out_seq_fhand=fasta, out_qual_fhand=qual, out_format='fasta') else: #the file is already fasta fasta = file_ qual = None fastas.append(fasta) quals.append(qual) return fastas, quals
def test_fastq_to_fasta_qual(): 'It tests the conversion from fastq to fasta' fcontent = '@seq1\n' fcontent += 'CCCT\n' fcontent += '+\n' fcontent += ';;3;\n' fcontent += '@SRR001666.1\n' fcontent += 'GTTGC\n' fcontent += '+\n' fcontent += ';;;;;\n' fhand = StringIO.StringIO(fcontent) out_seq_fhand = tempfile.NamedTemporaryFile(suffix='.fasta') out_qual_fhand = tempfile.NamedTemporaryFile(suffix='.qual') seqio(in_seq_fhand=fhand, in_format='fastq', out_seq_fhand=out_seq_fhand, out_qual_fhand=out_qual_fhand, out_format='fasta') result = '>seq1\nCCCT\n>SRR001666.1\nGTTGC\n' assert open(out_seq_fhand.name).read() == result qual = '>seq1\n26 26 18 26\n>SRR001666.1\n26 26 26 26 26\n' assert open(out_qual_fhand.name).read() == qual