def _prepare_template(self, context, template_pdbid): self._init_template(template_pdbid, context) ModelLogger.get_current().add("starting with template with {} chains" .format(len(context.get_chain_ids()))) try: self._oligomerize_template(context) except: self._init_template(template_pdbid, context) ModelLogger.get_current().add("after oligomerization: {} chains" .format(len(context.get_chain_ids()))) try: self._build_template_symmetry_residues(context) except: pass self._delete_solvent_residues(context) self._delete_exotic_residues(context) self._fix_template_errors(context) context.yasara.CleanObj(context.template_obj) return context
def _prepare_context(self, template_pdbid): if self.yasara_dir is None: raise InitError("yasara dir is not set") context = ModelingContext(self.yasara_dir) self._init_template(template_pdbid, context) ModelLogger.get_current().add("starting with template with {} chains" .format(len(context.get_chain_ids()))) try: self._oligomerize_template(context) except: self._init_template(template_pdbid, context) ModelLogger.get_current().add("after oligomerization: {} chains" .format(len(context.get_chain_ids()))) try: self._build_template_symmetry_residues(context) except: pass self._delete_solvent_residues(context) self._fix_template_errors(context) context.yasara.CleanObj(context.template_obj) return context
def create_model(target_sequence, target_species_id, require_resnum=None, chosen_template_id=None): target_species_id = target_species_id.upper() sequence_id = model_storage.get_sequence_id(target_sequence) lock_name = "lock_search_%s_%s_%s_%s" % (sequence_id, target_species_id, str(require_resnum), str(chosen_template_id)) if model_storage.model_dir is None: raise InitError("model directory is not set") lock_path = os.path.join(model_storage.model_dir, lock_name) with FileLock(lock_path): model_paths = model_storage.list_models(target_sequence, target_species_id, require_resnum, chosen_template_id) if len(model_paths) > 0: return select_best_model(model_paths, target_sequence, require_resnum) else: ModelLogger.get_current().clear() domain_alignments = \ domain_aligner.get_domain_alignments(target_sequence, require_resnum, chosen_template_id) if len(domain_alignments) <= 0: _log.warn("no domain alignments for target={} resnum={} template={}" .format(target_sequence, require_resnum, chosen_template_id)) return None domain_alignment = select_best_domain_alignment(domain_alignments) return modeler.build_model(target_sequence, target_species_id, domain_alignment, require_resnum)
def create_model(target_sequence, target_species_id, require_resnum=None, chosen_template_id=None): target_species_id = target_species_id.upper() sequence_id = model_storage.get_sequence_id(target_sequence) lock_name = "lock_search_%s_%s_%s_%s" % (sequence_id, target_species_id, str(require_resnum), str(chosen_template_id)) if model_storage.model_dir is None: raise InitError("model directory is not set") lock_path = os.path.join(model_storage.model_dir, lock_name) with FileLock(lock_path): model_paths = model_storage.list_models(target_sequence, target_species_id, require_resnum, chosen_template_id) if len(model_paths) > 0: return select_best_model(model_paths) else: ModelLogger.get_current().clear() domain_alignments = \ domain_aligner.get_domain_alignments(target_sequence, require_resnum, chosen_template_id) if len(domain_alignments) <= 0: _log.warn("no domain alignments for target={} resnum={} template={}" .format(target_sequence, require_resnum, chosen_template_id)) return None domain_alignment = select_best_domain_alignment(domain_alignments) return modeler.build_model(target_sequence, target_species_id, domain_alignment, require_resnum)
def build_model(self, main_target_sequence, target_species_id, main_domain_alignment, require_resnum=None): ModelLogger.get_current().add("building model with sequence {}, species {}, alignment {} and resnum {}" .format(main_target_sequence, target_species_id, main_domain_alignment, require_resnum)) tar_path = model_storage.get_tar_path(main_target_sequence, target_species_id, main_domain_alignment, main_domain_alignment.template_id) with model_storage.get_model_lock(main_target_sequence, target_species_id, main_domain_alignment, main_domain_alignment.template_id): if not os.path.isfile(tar_path): if self.yasara_dir is None: raise InitError("yasara dir is not set") with ModelingContext(self.yasara_dir) as context: self._prepare_template(context, main_domain_alignment.template_id.pdbid) # If the template is the same as the target, do no modeling: if main_domain_alignment.get_template_sequence() == context.get_sequence(main_domain_alignment.template_id.chain_id) and \ main_domain_alignment.get_percentage_identity() >= 100.0: main_domain_alignment.target_id = model_storage.get_sequence_id(main_target_sequence) tar_path = self._wrap_template(main_target_sequence, target_species_id, main_domain_alignment, main_domain_alignment.template_id) return tar_path context.set_main_target(main_target_sequence, target_species_id, main_domain_alignment.template_id.chain_id) chain_alignments = self._make_alignments(main_target_sequence, target_species_id, main_domain_alignment, context, require_resnum) # Delete chains that aren't in the alignment set: for chain_id in context.get_chain_ids(): if chain_id not in chain_alignments: context.delete_chain(chain_id) _log.debug("final alignments: {}".format([(chain_id, chain_alignments[chain_id]) for chain_id in context.get_chain_ids()])) _log.debug("final template {} {}".format(context.template_pdbid, [(chain_id, context.get_sequence(chain_id)) for chain_id in context.get_chain_ids()])) tar_path = self._model_run(main_domain_alignment, chain_alignments, context, main_target_sequence, require_resnum) return tar_path
def build_model(self, main_target_sequence, target_species_id, main_domain_alignment, require_resnum=None): ModelLogger.get_current().add("building model with sequence {}, species {}, alignment {} and resnum {}" .format(main_target_sequence, target_species_id, main_domain_alignment, require_resnum)) tar_path = model_storage.get_tar_path(main_target_sequence, target_species_id, main_domain_alignment, main_domain_alignment.template_id) with model_storage.get_model_lock(main_target_sequence, target_species_id, main_domain_alignment, main_domain_alignment.template_id): if not os.path.isfile(tar_path): context = self._prepare_context(main_domain_alignment.template_id.pdbid) # If the template is the same as the target, do no modeling: if main_domain_alignment.get_template_sequence() == context.get_sequence(main_domain_alignment.template_id.chain_id) and \ main_domain_alignment.get_percentage_identity() >= 100.0: main_domain_alignment.target_id = model_storage.get_sequence_id(main_target_sequence) tar_path = self._wrap_template(main_target_sequence, target_species_id, main_domain_alignment, main_domain_alignment.template_id) return tar_path context.set_main_target(main_target_sequence, target_species_id, main_domain_alignment.template_id.chain_id) chain_alignments = self._make_alignments(main_target_sequence, target_species_id, main_domain_alignment, context, require_resnum) # Delete chains that aren't in the alignment set: for chain_id in context.get_chain_ids(): if chain_id not in chain_alignments: context.delete_chain(chain_id) _log.debug("final alignments: {}".format([(chain_id, chain_alignments[chain_id]) for chain_id in context.get_chain_ids()])) _log.debug("final template {} {}".format(context.template_pdbid, [(chain_id, context.get_sequence(chain_id)) for chain_id in context.get_chain_ids()])) tar_path = self._model_run(main_domain_alignment, chain_alignments, context) return tar_path
def _group_identical_chains(self, context): sequences = {chain_id: context.get_sequence(chain_id) for chain_id in context.get_chain_ids()} # If there's only 1 chain, then we don't need to do anything: if len(sequences) <= 1: return sequences.keys() alignments = {} grouped = [] ids = list(sequences.keys()) while len(ids) > 0: id_ = ids[0] ids.remove(id_) grouped.append([id_]) for other_id in ids[:]: pair = (id_, other_id) # Aligning them all in one run can be very time-consuming, # so align two at the time: if pair not in alignments: alignments[pair] = clustal_aligner.align({id_: sequences[id_], other_id: sequences[other_id]}) _log.debug("aligned {} with {}: {}".format(id_, other_id, alignments[pair])) if alignments[pair].get_percentage_identity(id_, other_id) >= 99.0 and \ alignments[pair].count_aligned_residues(id_, other_id) > 20: grouped[-1].append(other_id) ids.remove(other_id) ModelLogger.get_current().add("grouped identical chains: {}".format(grouped)) return grouped
def _wrap_template(self, main_target_sequence, target_species_id, main_domain_alignment, template_id): model_name = model_storage.get_model_name(main_target_sequence, target_species_id, main_domain_alignment, template_id) work_dir_path = tempfile.mkdtemp() align_fasta_path = os.path.join(work_dir_path, 'align.fa') full_target_path = os.path.join(work_dir_path, 'target.fa') write_fasta(full_target_path, {'target': main_target_sequence}) try: os.chdir(work_dir_path) write_fasta(align_fasta_path, {'target': main_domain_alignment.target_alignment, str(template_id): main_domain_alignment.template_alignment}) model_path = os.path.join(work_dir_path, 'target.pdb') with open(model_path, 'w') as f: f.write(get_pdb_contents(template_id.pdbid)) self._write_selected_targets({template_id.chain_id: main_domain_alignment}, os.path.join(work_dir_path, 'selected-targets.txt')) log_path = os.path.join(work_dir_path, 'model.log') ModelLogger.get_current().write(log_path) tar_path = model_storage.get_tar_path(main_target_sequence, target_species_id, main_domain_alignment, template_id) with tarfile.open(tar_path, mode="w:gz") as ar: ar.add(work_dir_path, arcname=model_name) return tar_path finally: if os.path.isdir(work_dir_path): shutil.rmtree(work_dir_path)
def _model_run(self, main_domain_alignment, chain_alignments, context, main_target_sequence, require_resnum): model_name = model_storage.get_model_name(context.get_main_target_sequence(), context.target_species_id, main_domain_alignment, TemplateID(context.template_pdbid, context.main_target_chain_id)) work_dir_path = tempfile.mkdtemp() full_target_path = os.path.join(work_dir_path, 'target.fa') align_fasta_path = os.path.join(work_dir_path, 'align.fa') output_yob_path = os.path.join(work_dir_path, 'target.yob') error_path = os.path.join(work_dir_path, 'errorexit.txt') error_scene_path = os.path.join(work_dir_path, 'errorexit.sce') before_scene_path = os.path.join(work_dir_path, 'beforemodel.sce') write_fasta(full_target_path, {'target': context.get_main_target_sequence()}) try: context.yasara.CD(work_dir_path) context.yasara.SaveSce(before_scene_path) chain_ids_before_model = context.get_chain_ids() sequences_before_model = {chain_id: context.get_sequence(chain_id) for chain_id in chain_ids_before_model} self._write_model_alignment_fasta(context, chain_alignments, align_fasta_path) context.yasara.Processors(1) context.yasara.ExperimentHomologyModeling(templateobj=context.template_obj, alignfile=align_fasta_path, templates="1, sameseq = 1", alignments=1, termextension=0, oligostate=32, looplenmax=10, animation='fast', speed='fast', loopsamples=20, resultfile='target') context.yasara.Experiment("On") context.yasara.Wait("Expend") if os.path.isfile(error_path): self._handle_error_txt(error_path, work_dir_path, context, main_domain_alignment) elif os.path.isfile(error_scene_path): raise ModelRunError("yasara exited with an error") if not os.path.isfile(output_yob_path): chain_ids_after_failure = context.get_chain_ids() if chain_ids_before_model != chain_ids_after_failure: raise ModelRunError("During modeling, yasara changed the chains {} to {}" .format(chain_ids_before_model, chain_ids_after_failure)) for chain_id in chain_ids_before_model: sequence_after_failure = context.get_sequence(chain_id) if sequence_after_failure != sequences_before_model[chain_id]: raise ModelRunError("During modeling, yasara changed chain {} sequence {} to {}" .format(chain_id, sequences_before_model[chain_id], sequence_after_failure)) raise ModelRunError("yasara generated no output yob, check the console for further details") chain_ids_after_build = context.get_chain_ids() if context.main_target_chain_id not in chain_ids_after_build: raise ModelRunError(f"The chain {context.main_target_chain_id} is not in the final model output by yasara") _log.debug("after modeling {}".format([(chain_id, context.get_sequence(chain_id)) for chain_id in context.get_chain_ids()])) _log.debug("input target aligned sequence:\n{}".format(main_domain_alignment.get_target_sequence_without_insertions())) if not any([context.get_sequence(chain_id) == main_domain_alignment.get_target_sequence_without_insertions() for chain_id in context.get_chain_ids()]): if require_resnum is not None and not self._model_covers_residue(context, main_target_sequence, require_resnum): raise ModelRunError("yasara generated a model that doesn't match the input alignment") model_path = os.path.join(work_dir_path, 'target.pdb') context.yasara.SavePDB(context.template_obj, model_path) self._write_selected_targets(chain_alignments, os.path.join(work_dir_path, 'selected-targets.txt')) log_path = os.path.join(work_dir_path, 'model.log') ModelLogger.get_current().write(log_path) tar_path = model_storage.get_tar_path(context.get_main_target_sequence(), context.target_species_id, main_domain_alignment, TemplateID(context.template_pdbid, context.main_target_chain_id)) with tarfile.open(tar_path, mode="w:gz") as ar: ar.add(work_dir_path, arcname=model_name) return tar_path except RuntimeError as e: self._log_additional_error_info(e, chain_alignments, context) if os.path.isfile(error_path): self._handle_error_txt(error_path, work_dir_path, context, main_domain_alignment) elif os.path.isfile(error_scene_path): raise ModelRunError("yasara exited with an error") else: raise e finally: if os.path.isdir(work_dir_path): shutil.rmtree(work_dir_path)
def _make_alignments(self, main_target_sequence, target_species_id, main_domain_alignment, context, require_resnum): alignments = {} # Choose what chains to align the main_target_on main_target_chain_ids = self._pick_identical_chains(main_domain_alignment.template_id.chain_id, context) ModelLogger.get_current().add("using template chains {} for the main target sequence".format(main_target_chain_ids)) for chain_id in main_target_chain_ids: template_chain_sequence = context.get_sequence(chain_id) template_chain_secstr = context.get_secondary_structure(chain_id) local_alignment = kmad_aligner.align(template_chain_sequence, template_chain_secstr, main_domain_alignment.get_target_sequence()) alignments[chain_id] = DomainAlignment(local_alignment.target_alignment, local_alignment.template_alignment, main_domain_alignment.range, main_domain_alignment.template_id) alignments[chain_id].target_id = model_storage.get_sequence_id(main_target_sequence) if require_resnum is not None and \ not alignments[main_domain_alignment.template_id.chain_id].is_target_residue_covered(require_resnum): raise RuntimeError("Cannot align to chain {} so that residue {} is covered" .format(main_domain_alignment.template_id.chain_id, require_resnum)) # Try to find and align target sequences for interacting chains in the template, # while keeping in mind which residues interact and must thus be covered by the alignment. # We expand the set of involved template chains with every iteration, # until all template chains have been added. while len(alignments) < len(context.get_chain_ids()): # First, make python remember to which chains the candidate chains interact: candidate_chains_interacts_with = {} for aligned_chain_id in alignments: for interacting_chain_id in context.list_interacting_chains(aligned_chain_id): ModelLogger.get_current().add("template chain {} interacts with {}" .format(aligned_chain_id, interacting_chain_id)) # Skip those that we've already aligned, to prevent infinite loops: if interacting_chain_id in alignments: continue if interacting_chain_id not in candidate_chains_interacts_with: candidate_chains_interacts_with[interacting_chain_id] = [] candidate_chains_interacts_with[interacting_chain_id].append(aligned_chain_id) if len(candidate_chains_interacts_with) <= 0: break # Nothing more to add # iterate over chains that might interact with the chains that are already in the set: for candidate_chain_id in candidate_chains_interacts_with: interacting_chain_alignments = {interacting_chain_id: alignments[interacting_chain_id] for interacting_chain_id in candidate_chains_interacts_with[candidate_chain_id]} template_chain_sequence = context.get_sequence(candidate_chain_id) template_chain_secstr = context.get_secondary_structure(candidate_chain_id) potential_target_sequences = self._find_target_sequences(template_chain_sequence, target_species_id) ModelLogger.get_current().add("choosing target sequence for template chain {} from {}" .format(candidate_chain_id, potential_target_sequences.keys())) alignments[candidate_chain_id] = self._choose_best_target_alignment(context, interacting_chain_alignments, potential_target_sequences, candidate_chain_id) if alignments[candidate_chain_id] is None: alignments[candidate_chain_id] = self._make_poly_A_alignment(context, candidate_chain_id) alignments[candidate_chain_id].target_id = "poly-A" ModelLogger.get_current().add("found no target for template chain {}, placing poly-A" .format(candidate_chain_id)) return alignments
def get_domain_alignments(self, target_sequence, require_resnum=None, template_id=None): ModelLogger.get_current().add("getting domain alignments for sequence {}, resnum {}, template {}" .format(target_sequence, require_resnum, template_id)) if self.min_percentage_coverage is None: raise InitError("min percentage coverage is not set") interpro_ranges = interpro.get_domain_ranges(target_sequence) _log.debug("{} ranges from interpro".format(len(interpro_ranges))) sample_ranges = self._filter_forbidden_ranges(interpro_ranges) if require_resnum is not None: sample_ranges = list(filter(lambda r: r.includes_residue(require_resnum), sample_ranges)) _log.debug("{} ranges have residue {}".format(len(sample_ranges), require_resnum)) # Add the whole sequence as a range too: sample_ranges.append(SequenceRange(0, len(target_sequence), target_sequence)) ok_ranges_alignments = {} best_ranges_alignments = {} checked_ranges = [] while len(sample_ranges) > 0: merged_sample_ranges = self._merge_similar_ranges(sample_ranges) _log.debug("sampling {} ranges".format(len(merged_sample_ranges))) # Check the largest ranges first. If that yields, then the smaller ones don't matter. for range_ in sorted(merged_sample_ranges, key=lambda r: r.get_length(), reverse=True): if range_ in checked_ranges: continue # already passed this one checked_ranges.append(range_) if any([r.encloses(range_) for r in best_ranges_alignments]): continue # we already have a larger enclosing range # These can differ per range: best_hit = None last_resort_hit = None ModelLogger.get_current().add("examining range {}".format(range_)) hit_candidates = self._get_hits(range_, template_id) _log.debug('trying range: {} against {} hits'.format(range_, len(hit_candidates))) for hit_candidate in hit_candidates: hit_range = hit_candidate.get_query_range() if require_resnum is not None: if not hit_candidate.is_query_residue_covered(require_resnum): _log.debug("hit with {} on {} does not cover residue {}" .format(hit_candidate.get_hit_accession_code(), hit_range, require_resnum)) continue if self._alignment_ok_for_range(range_, hit_candidate): _log.debug("hit with {} {} is ok" .format(hit_candidate.get_hit_accession_code(), hit_range)) # This range made an OK alignment, so at least store it for later usage: template_id = TemplateID(hit_candidate.get_hit_accession_code(), hit_candidate.get_hit_chain_id()) ok_ranges_alignments[hit_range] = DomainAlignment(hit_candidate.query_alignment, hit_candidate.subject_alignment, hit_range, template_id) ModelLogger.get_current().add("found a hit with {} covering range {}:\n{}" .format(template_id, hit_range, hit_candidate)) if hit_candidate.get_percentage_coverage() > self.min_percentage_coverage: _log.debug("coverage is high enough for {} {}" .format(hit_candidate.get_hit_accession_code(), hit_range)) if best_hit is None or self._is_better_than(hit_candidate, best_hit): _log.debug("{} is better than {}".format(hit_candidate, best_hit)) ModelLogger.get_current().add("{} is better than {}".format(hit_candidate, best_hit)) best_hit = hit_candidate else: last_resort_hit = hit_candidate if best_hit is None: best_hit = last_resort_hit if best_hit is not None: # Remove any smaller ranges that this one encloses: best_ranges_alignments = self._remove_enclosing(range_, best_ranges_alignments) template_id = TemplateID(best_hit.get_hit_accession_code(), best_hit.get_hit_chain_id()) hit_range = best_hit.get_query_range() _log.debug("passing best hit with template {} with range {}".format(template_id, hit_range)) best_ranges_alignments[hit_range] = DomainAlignment(best_hit.query_alignment, best_hit.subject_alignment, hit_range, template_id) else: _log.debug("no hit for range {}".format(range_)) # After iterating the sample ranges, prepare for the next round: sample_ranges = self._clean_search_space(checked_ranges, sample_ranges, ok_ranges_alignments) return list(best_ranges_alignments.values())
def _model_run(self, main_domain_alignment, chain_alignments, context): model_name = model_storage.get_model_name(context.get_main_target_sequence(), context.target_species_id, main_domain_alignment, TemplateID(context.template_pdbid, context.main_target_chain_id)) work_dir_path = tempfile.mkdtemp() full_target_path = os.path.join(work_dir_path, 'target.fa') align_fasta_path = os.path.join(work_dir_path, 'align.fa') output_yob_path = os.path.join(work_dir_path, 'target.yob') error_path = os.path.join(work_dir_path, 'errorexit.txt') error_scene_path = os.path.join(work_dir_path, 'errorexit.sce') before_scene_path = os.path.join(work_dir_path, 'beforemodel.sce') write_fasta(full_target_path, {'target': context.get_main_target_sequence()}) try: context.yasara.CD(work_dir_path) context.yasara.SaveSce(before_scene_path) chain_ids_before_model = context.get_chain_ids() sequences_before_model = {chain_id: context.get_sequence(chain_id) for chain_id in chain_ids_before_model} self._write_model_alignment_fasta(context, chain_alignments, align_fasta_path) context.yasara.Processors(1) context.yasara.ExperimentHomologyModeling(templateobj=context.template_obj, alignfile=align_fasta_path, templates="1, sameseq = 1", alignments=1, termextension=0, oligostate=32, looplenmax=10, animation='fast', speed='fast', loopsamples=20, resultfile='target') context.yasara.Experiment("On") context.yasara.Wait("Expend") if os.path.isfile(error_path): self._handle_error_txt(error_path, work_dir_path, context, main_domain_alignment) elif os.path.isfile(error_scene_path): raise ModelRunError("yasara exited with an error") if not os.path.isfile(output_yob_path): chain_ids_after_failure = context.get_chain_ids() if chain_ids_before_model != chain_ids_after_failure: raise ModelRunError("During modeling, yasara changed the chains {} to {}" .format(chain_ids_before_model, chain_ids_after_failure)) for chain_id in chain_ids_before_model: sequence_after_failure = context.get_sequence(chain_id) if sequence_after_failure != sequences_before_model[chain_id]: raise ModelRunError("During modeling, yasara changed chain {} sequence {} to {}" .format(chain_id, sequences_before_model[chain_id], sequence_after_failure)) raise ModelRunError("yasara generated no output yob, check the console for further details") model_path = os.path.join(work_dir_path, 'target.pdb') context.yasara.SavePDB(context.template_obj, model_path) self._write_selected_targets(chain_alignments, os.path.join(work_dir_path, 'selected-targets.txt')) log_path = os.path.join(work_dir_path, 'model.log') ModelLogger.get_current().write(log_path) tar_path = model_storage.get_tar_path(context.get_main_target_sequence(), context.target_species_id, main_domain_alignment, TemplateID(context.template_pdbid, context.main_target_chain_id)) with tarfile.open(tar_path, mode="w:gz") as ar: ar.add(work_dir_path, arcname=model_name) return tar_path except RuntimeError as e: self._log_additional_error_info(e, chain_alignments, context) if os.path.isfile(error_path): self._handle_error_txt(error_path, work_dir_path, context, main_domain_alignment) elif os.path.isfile(error_scene_path): raise ModelRunError("yasara exited with an error") else: raise e finally: if os.path.isdir(work_dir_path): shutil.rmtree(work_dir_path)