def _build_profile(self, msa: str, model_construction: str='fast') -> str: if model_construction not in {'hand', 'fast'}: raise ValueError(f'HMMBuild: invalid model construction {model_construction}') with utils.tmpdir_manager() as query_tmp_dir: input_query = query_tmp_dir / Path('query.msa') output_hmm_path = query_tmp_dir / Path('output.hmm') with open(input_query, 'w') as f: f.write(msa) cmd = [self.binary_path.as_posix()] if model_construction == 'hand': cmd += [f'--{model_construction}'] if singlemx: cmd += ['--singlemx'] cmd += ['--amino', output_hmm_path.as_posix(), input_query.as_posix()] print(f"Launching subprocess {''.join(cmd)}") process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) with utils.timing(f'HMMBuild query'): stdout, stderr = process.communicate() retcode = process.wait() if retcode: raise RuntimeError(f"HMMBuild failed:\nstdout:\n{stdout.decode('utf-8')}\nstderr:\n{stderr.decode('utf-8')}") with open(output_hmm_path, encoding='utf-8') as f: hmm = f.read() return hmm
def query(self, a3m: str) -> str: with utils.tmpdir_manager() as query_tmp_dir: input_path = query_tmp_dir / Path('query.a3m') hhr_path = query_tmp_dir / Path('output.hhr') with open(input_path, 'w') as f: f.write(a3m) db_cmd = [] for db_path in self.databases: db_cmd += ['-d', db_path.as_posix()] cmd = [ self.binary_path.as_posix(), '-i', input_path.as_posix(), '-o', hhr_path.as_posix(), '-maxseq', str(self.maxseq) ] + db_cmd print(f'Launching subprocess {"".join(cmd)}') process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) with utils.timing(f'HHSearch query'): stdout, stderr = process.communicate() retcode = process.wait() if retcode: raise RuntimeError(f'HHSearch failed:\nstdout:\n{stdout.decode("utf-8")}\nstderr:\n{stderr[:100000].decode("utf-8")}') with open(hhr_path) as f: hhr = f.read() return hhr
def _query_chunk(self, input_fasta_path:Path, database_path:Path) -> Mapping[str, Any]: with utils.tmpdir_manager() as query_tmp_dir: sto_path = query_tmp_dir / Path('output.sto') cmd_flags = [ '-o', '/dev/null', '-A', sto_path.as_posix(), '--noali', '--F1', str(self.filter_f1), '--F2', str(self.filter_f2), '--F3', str(self.filter_f3), '--incE', str(self.e_value), '-E', str(self.e_value), '--cpu', str(self.n_cpu), '-N', str(self.n_iter) ] if self.get_tblout: tblout_path = query_tmp_dir / Path('tblout.txt') cmdflags.extend(['-tblout', tblout_path.as_posix()]) if not (self.z_value is None): cmdflags.extend(['-Z', str(self.z_value)]) if not (self.dom_e is None): cmdflags.extend(['--domE', str(self.dom_e)]) if not (self.incdom_e is None): cmdflags.extend(['--incdomE', str(self.incdom_e)]) cmd = [self.binary_path.as_posix()] + cmd_flags + [input_fasta_path.as_posix(), database_path.as_posix()] print(f"Launching subprocess {''.join(cmd)}") process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) with utils.timing(f'Jackhammer {database_path.name} query'): _, stderr = process.communicate() retcode = process.wait() if retcode: raise RuntimeError(f"Jackhammer failed: {stderr.decode('utf-8')}") tbl='' if self.get_tblout: with open(tblout_path) as f: tbl = f.read() with open(sto_path) as f: sto = f.read() raw_output = dict( sto = sto, tbl = tbl, stderr = stderr, n_iter = self.n_iter, e_value = self.e_value ) return raw_output
def query(self, input_fasta_path: Path) -> Mapping[str, Any]: with utils.tmpdir_manager() as query_tmp_dir: a3m_path = query_tmp_dir / Path('output.a3m') db_cmd = [] for db_path in self.databases: db_cmd += ['-d', db_path.as_posix()] cmd = [ self.binary_path, '-i', input_fasta_path.as_posix(), '-cpu', str(self.n_cpu), '-oa3m', a3m_path.as_posix(), '-o', '/dev/null', '-n', str(self.n_iter), '-e', str(self.e_value), '-maxseq', str(self.maxseq), '-realign_max', str(self.realign_max), '-maxfilt', str(self.maxfilt), '-min_prefilter_hits', str(self.min_prefilter_hits) ] if self.all_seqs: cmd += ['-all'] if self.alt: cmd += ['-alt', str(self.alt)] if self.p != HHBlits._DEFAULT_P: cmd += ['-P', str(self.p)] if self.z != HHBlits._DEFAULT_Z: cmd += ['-Z', str(self.z)] cmd += db_cmd print(f'Launching subprocess {"".join(cmd)}') process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) with utils.timing(f'HHBlits query'): stdout, stderr = process.communicate() retcode = process.wait() if retcode: raise RuntimeError( f"HHBlits failed:\nstdout:\n{stdout.decode('utf-8')}\nstderr:\n{stderr[:500000].decode('utf-8')}" ) with open(a3m_path) as f: a3m = f.read() raw_output = dict(a3m=a3m, output=stdout, stderr=stderr, n_iter=self.n_iter, e_value=self.e_value) return raw_output
def align(self, sequences: Sequence[str]) -> str: def _to_a3m(sequences: Sequence[str]) -> str: names = [f'sequence {i}' for i in range(1, len(sequences) + 1)] a3m = [] for sequence, name in zip(sequences, names): a3m.append(u'>' + name + u'\n') a3m.append(sequence + u'\n') return ''.join(a3m) print(f'Aligning {len(sequences)} sequences') for seq in sequences: if len(seq) < 6: raise ValueError( f'Kalign: sequences should be at least 6 res long, got {seq}, {len(seq)}' ) with utils.tmpdir_manager() as query_tmp_dir: input_fasta_path = tmpdir_manager / Path('input.fasta') output_a3m_path = tmpdir_manager / Path('output.a3m') with open(input_fasta_path, 'w') as f: f.write(_to_a3m(sequences)) cmd = [ self.binary_path.as_posix(), '-i', input_fasta_path.as_posix(), '-o', output_a3m_path.as_posix(), '-format', 'fasta' ] print(f"Launching subprocess {''.join(cmd)}") process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) with utils.timing(f'Kalign query'): stdout, stderr = process.communicate() retcode = process.wait() if retcode: raise RuntimeError( f"Kalign failed:\nstdout:\n{stdout.decode('utf-8')}\nstderr:\n{stderr.decode('utf-8')}" ) with open(output_a3m_path) as f: a3m = f.read() return a3m
def query(self, hmm: str) -> str: with utils.tmpdir_manager() as query_tmp_dir: hmm_input_path = query_tmp_dir / Path('query.hmm') output_a3m_path = query_tmp_dir / Path('output.a3m') with open(hmm_input_path, 'w') as f: f.write(hmm) cmd = [ self.binary_path.as_posix(), '--noali', '--cpu', str(self.n_cpu) ] if self.flags: cmd += self.flags cmd += [ '-A', output_a3m_path.as_posix(), hmm_input_path.as_posix(), self.database_path ] print(f"Launching subprocess {''.join(cmd)}") process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) with utils.timing(f'HMMSearch query'): stdout, stderr = process.communicate() retcode = process.wait() if retcode: raise RuntimeError( f"HMMSearch failed:\nstdout:\n{stdout.decode('utf-8')}\nstderr:\n{stderr.decode('utf-8')}" ) with open(output_a3m_path) as f: a3m = f.read() return a3m