def blastp(self, sequence, databank): if self.blastp_exe is None: raise InitError("blastp executable is not set") input_path = tempfile.mktemp() output_path = tempfile.mktemp() write_fasta(input_path, {'target': sequence}) cmd = [ self.blastp_exe, '-query', input_path, '-db', databank, '-outfmt', '5', '-out', output_path ] _log.debug("{}".format(cmd)) try: p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p.wait() if p.returncode != 0: raise RuntimeError(p.stderr.read()) with open(output_path, 'r') as f: xml_str = f.read() finally: for path in [input_path, output_path]: if os.path.isfile(path): os.remove(path) return self._parse_alignments(xml_str, sequence, databank)
def align(self, input_): if self.clustalw_exe is None: raise InitError("clustalw executable is not set") input_ = self._fix_input(input_) input_path = tempfile.mktemp() output_path = tempfile.mktemp() write_fasta(input_path, input_) cmd = [self.clustalw_exe, '-TYPE=PROTEIN', '-OUTPUT=FASTA', '-PWMATRIX=BLOSUM', '-OUTFILE=%s' % output_path, '-INFILE=%s' % input_path] try: p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p.wait() if p.returncode != 0: raise RuntimeError("%s for %s" % (p.stderr.read().decode('ascii'), str(input_))) return Alignment(self._fix_output(parse_fasta(output_path))) finally: for path in [input_path, output_path]: if os.path.isfile(path): os.remove(path)
def align(self, input_): if self.clustalw_exe is None: raise InitError("clustalw executable is not set") input_path = tempfile.mktemp() output_path = tempfile.mktemp() write_fasta(input_path, input_) cmd = [self.clustalw_exe, '-TYPE=PROTEIN', '-OUTPUT=FASTA', '-PWMATRIX=BLOSUM', '-OUTFILE=%s' % output_path, '-INFILE=%s' % input_path] try: p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p.wait() if p.returncode != 0: raise RuntimeError("%s for %s" % (p.stderr.read().decode('ascii'), str(input_))) return Alignment(parse_fasta(output_path)) finally: for path in [input_path, output_path]: if os.path.isfile(path): os.remove(path)
def blastp(self, sequence, databank): if self.blastp_exe is None: raise InitError("blastp executable is not set") input_path = tempfile.mktemp() output_path = tempfile.mktemp() write_fasta(input_path, {'target': sequence}) cmd = [self.blastp_exe, '-query', input_path, '-db', databank, '-outfmt', '5', '-out', output_path] _log.debug("{}".format(cmd)) try: p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd='/') p.wait() if p.returncode != 0: err_msg = p.stderr.read().decode('ascii') if err_msg.startswith("BLAST Database error: No alias or index file found for protein database"): raise RecoverableError(err_msg) raise RuntimeError("%s for databank %s, sequence %s" % (err_msg, databank, sequence)) with open(output_path, 'r') as f: xml_str = f.read() finally: for path in [input_path, output_path]: if os.path.isfile(path): os.remove(path) return self._parse_alignments(xml_str, sequence, databank)
def align(self, template_sequence, template_secstr, target_sequence, gap_open=-13.0, gap_extend=-0.4, modifier=3.0): _log.debug("kmad align\n{}\n{}\n{}".format(template_sequence, template_secstr, target_sequence)) # Prevent kmad from adding insertions in bulges, replace those. template_secstr = self._remove_bulges(template_secstr, 'H', 3) template_secstr = self._remove_bulges(template_secstr, 'E', 3) if len(template_sequence) <= 0: raise ValueError("empty template sequence") if len(template_sequence) != len(template_secstr): raise ValueError( "template sequence ({}) has different length than secondary structure ({})" .format(len(template_sequence), len(template_secstr))) kmad_template_sequence = self._to_kmad_sequence( template_sequence, template_secstr) kmad_target_sequence = self._to_kmad_sequence(target_sequence) input_path = tempfile.mktemp() output_path = tempfile.mktemp() write_fasta(input_path, { 'target': kmad_target_sequence, 'template': kmad_template_sequence }) try: self._run_kmad(input_path, output_path, gap_open, gap_extend, modifier) output_path += '_al' aligned = parse_fasta(output_path) _log.debug("kmad aligned\n{}\n{}".format(aligned['target'], aligned['template'])) finally: for path in [input_path, output_path]: if os.path.isfile(path): os.remove(path) alignment = TargetTemplateAlignment(aligned['target'], aligned['template']) return alignment
def blastp(self, sequence, databank): if self.blastp_exe is None: raise InitError("blastp executable is not set") input_path = tempfile.mktemp() output_path = tempfile.mktemp() write_fasta(input_path, {'target': sequence}) cmd = [ self.blastp_exe, '-query', input_path, '-db', databank, '-outfmt', '5', '-out', output_path ] _log.debug("{}".format(cmd)) try: p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd='/') p.wait() if p.returncode != 0: err_msg = p.stderr.read().decode('ascii') if err_msg.startswith( "BLAST Database error: No alias or index file found for protein database" ): raise RecoverableError(err_msg) raise RuntimeError("%s for databank %s, sequence %s" % (err_msg, databank, sequence)) with open(output_path, 'r') as f: xml_str = f.read() finally: for path in [input_path, output_path]: if os.path.isfile(path): os.remove(path) return self._parse_alignments(xml_str, sequence, databank)
def align(self, template_sequence, template_secstr, target_sequence, gap_open=-13.0, gap_extend=-0.4, modifier=3.0): _log.debug("kmad align\n{}\n{}\n{}".format(template_sequence, template_secstr, target_sequence)) # Prevent kmad from adding insertions in bulges, replace those. template_secstr = self._remove_bulges(template_secstr, 'H', 3) template_secstr = self._remove_bulges(template_secstr, 'E', 3) if len(template_sequence) <= 0: raise ValueError("empty template sequence") if len(template_sequence) != len(template_secstr): raise ValueError("template sequence ({}) has different length than secondary structure ({})" .format(len(template_sequence), len(template_secstr))) kmad_template_sequence = self._to_kmad_sequence(template_sequence, template_secstr) kmad_target_sequence = self._to_kmad_sequence(target_sequence) input_path = tempfile.mktemp() output_path = tempfile.mktemp() write_fasta(input_path, {'target': kmad_target_sequence, 'template': kmad_template_sequence}) try: self._run_kmad(input_path, output_path, gap_open, gap_extend, modifier) output_path += '_al' aligned = parse_fasta(output_path) _log.debug("kmad aligned\n{}\n{}".format(aligned['target'], aligned['template'])) finally: for path in [input_path, output_path]: if os.path.isfile(path): os.remove(path) alignment = TargetTemplateAlignment(aligned['target'], aligned['template']) return alignment
def _wrap_template(self, main_target_sequence, target_species_id, main_domain_alignment, template_id): model_name = model_storage.get_model_name(main_target_sequence, target_species_id, main_domain_alignment, template_id) work_dir_path = tempfile.mkdtemp() align_fasta_path = os.path.join(work_dir_path, 'align.fa') try: os.chdir(work_dir_path) write_fasta( align_fasta_path, { 'target': main_domain_alignment.target_alignment, str(template_id): main_domain_alignment.template_alignment }) model_path = os.path.join(work_dir_path, 'target.pdb') with open(model_path, 'w') as f: f.write(get_pdb_contents(template_id.pdbid)) self._write_selected_targets( {template_id.chain_id: main_domain_alignment}, os.path.join(work_dir_path, 'selected-targets.txt')) tar_path = model_storage.get_tar_path(main_target_sequence, target_species_id, main_domain_alignment, template_id) with tarfile.open(tar_path, mode="w:gz") as ar: ar.add(work_dir_path, arcname=model_name) return tar_path finally: if os.path.isdir(work_dir_path): shutil.rmtree(work_dir_path)
def _wrap_template(self, main_target_sequence, target_species_id, main_domain_alignment, template_id): model_name = model_storage.get_model_name(main_target_sequence, target_species_id, main_domain_alignment, template_id) work_dir_path = tempfile.mkdtemp() align_fasta_path = os.path.join(work_dir_path, 'align.fa') full_target_path = os.path.join(work_dir_path, 'target.fa') write_fasta(full_target_path, {'target': main_target_sequence}) try: os.chdir(work_dir_path) write_fasta(align_fasta_path, {'target': main_domain_alignment.target_alignment, str(template_id): main_domain_alignment.template_alignment}) model_path = os.path.join(work_dir_path, 'target.pdb') with open(model_path, 'w') as f: f.write(get_pdb_contents(template_id.pdbid)) self._write_selected_targets({template_id.chain_id: main_domain_alignment}, os.path.join(work_dir_path, 'selected-targets.txt')) log_path = os.path.join(work_dir_path, 'model.log') ModelLogger.get_current().write(log_path) tar_path = model_storage.get_tar_path(main_target_sequence, target_species_id, main_domain_alignment, template_id) with tarfile.open(tar_path, mode="w:gz") as ar: ar.add(work_dir_path, arcname=model_name) return tar_path finally: if os.path.isdir(work_dir_path): shutil.rmtree(work_dir_path)
if P_PROT.match(sequence.value): fasta_key = 'pdb|%s|%s' % (pdbid.upper(), chain.value) # PDBFINDER can contain multiple sequences with the same id. # Always take the largest. if fasta_key in sequences and len(sequences[fasta_key]) > len(sequence.value): continue # Blast cannot handle '-' in the sequence, so replace it with 'X'. sequences[fasta_key] = sequence.value.replace('-', 'X') _log.info("{} {}".format(fasta_key, sequences[fasta_key])) return sequences if __name__ == "__main__": logging.basicConfig() if settings['DEBUG']: _log.setLevel(logging.DEBUG) parser = ArgumentParser(description='Make a fasta of all usable templates') parser.add_argument('output_file', help='the output fasta file') args = parser.parse_args() sequences = get_sequences() write_fasta(args.output_file, sequences)
sequences = {} for tar_path in model_storage.list_all_models(): try: contents = model_storage.extract_model(tar_path) except: continue seqres = parse_seqres_from_string(contents) for chain_id in seqres: sequences[tar_path + '|' + chain_id] = ''.join([aa.letter for aa in seqres[chain_id]]) return sequences if __name__ == "__main__": logging.basicConfig() if settings['DEBUG']: _log.setLevel(logging.DEBUG) parser = ArgumentParser(description='Make a fasta of all usable models') parser.add_argument('output_file', help='the output fasta file') args = parser.parse_args() sequences = get_sequences() write_fasta(args.output_file, sequences)
def _model_run(self, main_domain_alignment, chain_alignments, context, main_target_sequence, require_resnum): model_name = model_storage.get_model_name(context.get_main_target_sequence(), context.target_species_id, main_domain_alignment, TemplateID(context.template_pdbid, context.main_target_chain_id)) work_dir_path = tempfile.mkdtemp() full_target_path = os.path.join(work_dir_path, 'target.fa') align_fasta_path = os.path.join(work_dir_path, 'align.fa') output_yob_path = os.path.join(work_dir_path, 'target.yob') error_path = os.path.join(work_dir_path, 'errorexit.txt') error_scene_path = os.path.join(work_dir_path, 'errorexit.sce') before_scene_path = os.path.join(work_dir_path, 'beforemodel.sce') write_fasta(full_target_path, {'target': context.get_main_target_sequence()}) try: context.yasara.CD(work_dir_path) context.yasara.SaveSce(before_scene_path) chain_ids_before_model = context.get_chain_ids() sequences_before_model = {chain_id: context.get_sequence(chain_id) for chain_id in chain_ids_before_model} self._write_model_alignment_fasta(context, chain_alignments, align_fasta_path) context.yasara.Processors(1) context.yasara.ExperimentHomologyModeling(templateobj=context.template_obj, alignfile=align_fasta_path, templates="1, sameseq = 1", alignments=1, termextension=0, oligostate=32, looplenmax=10, animation='fast', speed='fast', loopsamples=20, resultfile='target') context.yasara.Experiment("On") context.yasara.Wait("Expend") if os.path.isfile(error_path): self._handle_error_txt(error_path, work_dir_path, context, main_domain_alignment) elif os.path.isfile(error_scene_path): raise ModelRunError("yasara exited with an error") if not os.path.isfile(output_yob_path): chain_ids_after_failure = context.get_chain_ids() if chain_ids_before_model != chain_ids_after_failure: raise ModelRunError("During modeling, yasara changed the chains {} to {}" .format(chain_ids_before_model, chain_ids_after_failure)) for chain_id in chain_ids_before_model: sequence_after_failure = context.get_sequence(chain_id) if sequence_after_failure != sequences_before_model[chain_id]: raise ModelRunError("During modeling, yasara changed chain {} sequence {} to {}" .format(chain_id, sequences_before_model[chain_id], sequence_after_failure)) raise ModelRunError("yasara generated no output yob, check the console for further details") chain_ids_after_build = context.get_chain_ids() if context.main_target_chain_id not in chain_ids_after_build: raise ModelRunError(f"The chain {context.main_target_chain_id} is not in the final model output by yasara") _log.debug("after modeling {}".format([(chain_id, context.get_sequence(chain_id)) for chain_id in context.get_chain_ids()])) _log.debug("input target aligned sequence:\n{}".format(main_domain_alignment.get_target_sequence_without_insertions())) if not any([context.get_sequence(chain_id) == main_domain_alignment.get_target_sequence_without_insertions() for chain_id in context.get_chain_ids()]): if require_resnum is not None and not self._model_covers_residue(context, main_target_sequence, require_resnum): raise ModelRunError("yasara generated a model that doesn't match the input alignment") model_path = os.path.join(work_dir_path, 'target.pdb') context.yasara.SavePDB(context.template_obj, model_path) self._write_selected_targets(chain_alignments, os.path.join(work_dir_path, 'selected-targets.txt')) log_path = os.path.join(work_dir_path, 'model.log') ModelLogger.get_current().write(log_path) tar_path = model_storage.get_tar_path(context.get_main_target_sequence(), context.target_species_id, main_domain_alignment, TemplateID(context.template_pdbid, context.main_target_chain_id)) with tarfile.open(tar_path, mode="w:gz") as ar: ar.add(work_dir_path, arcname=model_name) return tar_path except RuntimeError as e: self._log_additional_error_info(e, chain_alignments, context) if os.path.isfile(error_path): self._handle_error_txt(error_path, work_dir_path, context, main_domain_alignment) elif os.path.isfile(error_scene_path): raise ModelRunError("yasara exited with an error") else: raise e finally: if os.path.isdir(work_dir_path): shutil.rmtree(work_dir_path)
def _model_run(self, main_domain_alignment, chain_alignments, context): model_name = model_storage.get_model_name(context.get_main_target_sequence(), context.target_species_id, main_domain_alignment, TemplateID(context.template_pdbid, context.main_target_chain_id)) work_dir_path = tempfile.mkdtemp() full_target_path = os.path.join(work_dir_path, 'target.fa') align_fasta_path = os.path.join(work_dir_path, 'align.fa') output_yob_path = os.path.join(work_dir_path, 'target.yob') error_path = os.path.join(work_dir_path, 'errorexit.txt') error_scene_path = os.path.join(work_dir_path, 'errorexit.sce') before_scene_path = os.path.join(work_dir_path, 'beforemodel.sce') write_fasta(full_target_path, {'target': context.get_main_target_sequence()}) try: context.yasara.CD(work_dir_path) context.yasara.SaveSce(before_scene_path) chain_ids_before_model = context.get_chain_ids() sequences_before_model = {chain_id: context.get_sequence(chain_id) for chain_id in chain_ids_before_model} self._write_model_alignment_fasta(context, chain_alignments, align_fasta_path) context.yasara.Processors(1) context.yasara.ExperimentHomologyModeling(templateobj=context.template_obj, alignfile=align_fasta_path, templates="1, sameseq = 1", alignments=1, termextension=0, oligostate=32, looplenmax=10, animation='fast', speed='fast', loopsamples=20, resultfile='target') context.yasara.Experiment("On") context.yasara.Wait("Expend") if os.path.isfile(error_path): self._handle_error_txt(error_path, work_dir_path, context, main_domain_alignment) elif os.path.isfile(error_scene_path): raise ModelRunError("yasara exited with an error") if not os.path.isfile(output_yob_path): chain_ids_after_failure = context.get_chain_ids() if chain_ids_before_model != chain_ids_after_failure: raise ModelRunError("During modeling, yasara changed the chains {} to {}" .format(chain_ids_before_model, chain_ids_after_failure)) for chain_id in chain_ids_before_model: sequence_after_failure = context.get_sequence(chain_id) if sequence_after_failure != sequences_before_model[chain_id]: raise ModelRunError("During modeling, yasara changed chain {} sequence {} to {}" .format(chain_id, sequences_before_model[chain_id], sequence_after_failure)) raise ModelRunError("yasara generated no output yob, check the console for further details") model_path = os.path.join(work_dir_path, 'target.pdb') context.yasara.SavePDB(context.template_obj, model_path) self._write_selected_targets(chain_alignments, os.path.join(work_dir_path, 'selected-targets.txt')) log_path = os.path.join(work_dir_path, 'model.log') ModelLogger.get_current().write(log_path) tar_path = model_storage.get_tar_path(context.get_main_target_sequence(), context.target_species_id, main_domain_alignment, TemplateID(context.template_pdbid, context.main_target_chain_id)) with tarfile.open(tar_path, mode="w:gz") as ar: ar.add(work_dir_path, arcname=model_name) return tar_path except RuntimeError as e: self._log_additional_error_info(e, chain_alignments, context) if os.path.isfile(error_path): self._handle_error_txt(error_path, work_dir_path, context, main_domain_alignment) elif os.path.isfile(error_scene_path): raise ModelRunError("yasara exited with an error") else: raise e finally: if os.path.isdir(work_dir_path): shutil.rmtree(work_dir_path)