def records(): if refseq: yield HMMER.valid(refseq, is_dna=is_dna) for record in seqrecords: if not is_dna and record.seq.alphabet == DNAAlphabet: record = translate(record) yield HMMER.valid(record, is_dna=is_dna)
def generate_hmm_(opts): fd, tmphmm = mkstemp() close(fd) fd, tmpaln = mkstemp() close(fd) is_dna = opts.ENCODER == DNAEncoder try: with open(opts.REFMSA) as msa_fh: with open(tmpaln, 'w') as aln_fh: msa_fmt = seqfile_format(opts.REFMSA) source = Verifier(SeqIO.parse(msa_fh, msa_fmt), DNAAlphabet) try: SeqIO.write((record if is_dna else translate(record) for record in source), aln_fh, 'stockholm') except VerifyError: if is_dna: raise RuntimeError( "DNA encoding incompatible with protein reference MSA" ) source.set_alphabet(AminoAlphabet) aln_fh.seek(0) SeqIO.write(source, aln_fh, 'stockholm') hmmer = HMMER(opts.HMMER_ALIGN_BIN, opts.HMMER_BUILD_BIN) hmmer.build(tmphmm, tmpaln, alphabet=HMMER.DNA if is_dna else HMMER.AMINO) finally: if exists(tmpaln): remove(tmpaln) return tmphmm
def generate_alignment_(seqrecords, hmmfile, opts, refseq=None): fd, tmpseq = mkstemp(); close(fd) fd, tmpaln = mkstemp(); close(fd) finished = False is_dna = opts.ENCODER == DNAEncoder log = getLogger(IDEPI_LOGGER) try: # get the FASTA format file so we can HMMER it with open(tmpseq, 'w') as seq_fh: def records(): if refseq: yield HMMER.valid(refseq, is_dna=is_dna) for record in seqrecords: if not is_dna and record.seq.alphabet == DNAAlphabet: record = translate(record) yield HMMER.valid(record, is_dna=is_dna) SeqIO.write(records(), seq_fh, 'fasta') log.debug('aligning sequences') hmmer = HMMER(opts.HMMER_ALIGN_BIN, opts.HMMER_BUILD_BIN) hmmer.align( hmmfile, tmpseq, output=tmpaln, alphabet=HMMER.DNA if is_dna else HMMER.AMINO, outformat=HMMER.PFAM ) # rename the final alignment to its destination finished = True finally: # cleanup these files if exists(tmpseq): remove(tmpseq) if not finished: raise RuntimeError("failed to generate alignment") return tmpaln
def generate_alignment_(seqrecords, hmmfile, opts, refseq=None): fd, tmpseq = mkstemp() close(fd) fd, tmpaln = mkstemp() close(fd) finished = False is_dna = opts.ENCODER == DNAEncoder log = getLogger(IDEPI_LOGGER) try: # get the FASTA format file so we can HMMER it with open(tmpseq, 'w') as seq_fh: def records(): if refseq: yield HMMER.valid(refseq, is_dna=is_dna) for record in seqrecords: if not is_dna and record.seq.alphabet == DNAAlphabet: record = translate(record) yield HMMER.valid(record, is_dna=is_dna) SeqIO.write(records(), seq_fh, 'fasta') log.debug('aligning sequences') hmmer = HMMER(opts.HMMER_ALIGN_BIN, opts.HMMER_BUILD_BIN) hmmer.align(hmmfile, tmpseq, output=tmpaln, alphabet=HMMER.DNA if is_dna else HMMER.AMINO, outformat=HMMER.PFAM) # rename the final alignment to its destination finished = True finally: # cleanup these files if exists(tmpseq): remove(tmpseq) if not finished: raise RuntimeError("failed to generate alignment") return tmpaln
def generate_hmm_(opts): fd, tmphmm = mkstemp(); close(fd) fd, tmpaln = mkstemp(); close(fd) is_dna = opts.ENCODER == DNAEncoder try: with open(opts.REFMSA) as msa_fh: with open(tmpaln, 'w') as aln_fh: msa_fmt = seqfile_format(opts.REFMSA) source = Verifier(SeqIO.parse(msa_fh, msa_fmt), DNAAlphabet) try: SeqIO.write( (record if is_dna else translate(record) for record in source), aln_fh, 'stockholm') except VerifyError: if is_dna: raise RuntimeError("DNA encoding incompatible with protein reference MSA") source.set_alphabet(AminoAlphabet) aln_fh.seek(0) SeqIO.write( source, aln_fh, 'stockholm') hmmer = HMMER(opts.HMMER_ALIGN_BIN, opts.HMMER_BUILD_BIN) hmmer.build( tmphmm, tmpaln, alphabet=HMMER.DNA if is_dna else HMMER.AMINO ) finally: if exists(tmpaln): remove(tmpaln) return tmphmm