예제 #1
0
파일: model.py 프로젝트: cmbi/hommod
    def _prepare_template(self, context, template_pdbid):

        self._init_template(template_pdbid, context)

        ModelLogger.get_current().add("starting with template with {} chains"
                                      .format(len(context.get_chain_ids())))
        try:
            self._oligomerize_template(context)
        except:
            self._init_template(template_pdbid, context)

        ModelLogger.get_current().add("after oligomerization: {} chains"
                                      .format(len(context.get_chain_ids())))

        try:
            self._build_template_symmetry_residues(context)
        except:
            pass

        self._delete_solvent_residues(context)
        self._delete_exotic_residues(context)
        self._fix_template_errors(context)

        context.yasara.CleanObj(context.template_obj)
        return context
예제 #2
0
파일: model.py 프로젝트: cmbi/hommod-rest
    def _prepare_context(self, template_pdbid):
        if self.yasara_dir is None:
            raise InitError("yasara dir is not set")

        context = ModelingContext(self.yasara_dir)

        self._init_template(template_pdbid, context)

        ModelLogger.get_current().add("starting with template with {} chains"
                                      .format(len(context.get_chain_ids())))
        try:
            self._oligomerize_template(context)
        except:
            self._init_template(template_pdbid, context)

        ModelLogger.get_current().add("after oligomerization: {} chains"
                                      .format(len(context.get_chain_ids())))

        try:
            self._build_template_symmetry_residues(context)
        except:
            pass

        self._delete_solvent_residues(context)
        self._fix_template_errors(context)

        context.yasara.CleanObj(context.template_obj)
        return context
예제 #3
0
파일: tasks.py 프로젝트: cmbi/hommod
def create_model(target_sequence, target_species_id, require_resnum=None, chosen_template_id=None):

    target_species_id = target_species_id.upper()

    sequence_id = model_storage.get_sequence_id(target_sequence)
    lock_name = "lock_search_%s_%s_%s_%s" % (sequence_id,
                                             target_species_id,
                                             str(require_resnum),
                                             str(chosen_template_id))

    if model_storage.model_dir is None:
        raise InitError("model directory is not set")

    lock_path = os.path.join(model_storage.model_dir, lock_name)
    with FileLock(lock_path):

        model_paths = model_storage.list_models(target_sequence, target_species_id,
                                                require_resnum, chosen_template_id)
        if len(model_paths) > 0:
            return select_best_model(model_paths, target_sequence, require_resnum)
        else:
            ModelLogger.get_current().clear()

            domain_alignments = \
                domain_aligner.get_domain_alignments(target_sequence,
                                                     require_resnum,
                                                     chosen_template_id)
            if len(domain_alignments) <= 0:
                _log.warn("no domain alignments for target={} resnum={} template={}"
                          .format(target_sequence, require_resnum, chosen_template_id))
                return None

            domain_alignment = select_best_domain_alignment(domain_alignments)
            return modeler.build_model(target_sequence, target_species_id,
                                       domain_alignment, require_resnum)
예제 #4
0
파일: tasks.py 프로젝트: cmbi/hommod-rest
def create_model(target_sequence, target_species_id, require_resnum=None, chosen_template_id=None):

    target_species_id = target_species_id.upper()

    sequence_id = model_storage.get_sequence_id(target_sequence)
    lock_name = "lock_search_%s_%s_%s_%s" % (sequence_id,
                                             target_species_id,
                                             str(require_resnum),
                                             str(chosen_template_id))

    if model_storage.model_dir is None:
        raise InitError("model directory is not set")

    lock_path = os.path.join(model_storage.model_dir, lock_name)
    with FileLock(lock_path):

        model_paths = model_storage.list_models(target_sequence, target_species_id,
                                                require_resnum, chosen_template_id)
        if len(model_paths) > 0:
            return select_best_model(model_paths)
        else:
            ModelLogger.get_current().clear()

            domain_alignments = \
                domain_aligner.get_domain_alignments(target_sequence,
                                                     require_resnum,
                                                     chosen_template_id)
            if len(domain_alignments) <= 0:
                _log.warn("no domain alignments for target={} resnum={} template={}"
                          .format(target_sequence, require_resnum, chosen_template_id))
                return None

            domain_alignment = select_best_domain_alignment(domain_alignments)
            return modeler.build_model(target_sequence, target_species_id,
                                       domain_alignment, require_resnum)
예제 #5
0
파일: model.py 프로젝트: cmbi/hommod
    def build_model(self, main_target_sequence, target_species_id, main_domain_alignment, require_resnum=None):

        ModelLogger.get_current().add("building model with sequence {}, species {}, alignment {} and resnum {}"
                                      .format(main_target_sequence, target_species_id, main_domain_alignment, require_resnum))

        tar_path = model_storage.get_tar_path(main_target_sequence,
                                              target_species_id,
                                              main_domain_alignment,
                                              main_domain_alignment.template_id)

        with model_storage.get_model_lock(main_target_sequence, target_species_id,
                                          main_domain_alignment, main_domain_alignment.template_id):
            if not os.path.isfile(tar_path):

                if self.yasara_dir is None:
                    raise InitError("yasara dir is not set")

                with ModelingContext(self.yasara_dir) as context:

                    self._prepare_template(context, main_domain_alignment.template_id.pdbid)

                    # If the template is the same as the target, do no modeling:
                    if main_domain_alignment.get_template_sequence() == context.get_sequence(main_domain_alignment.template_id.chain_id) and \
                            main_domain_alignment.get_percentage_identity() >= 100.0:

                        main_domain_alignment.target_id = model_storage.get_sequence_id(main_target_sequence)

                        tar_path = self._wrap_template(main_target_sequence, target_species_id,
                                                       main_domain_alignment, main_domain_alignment.template_id)
                        return tar_path


                    context.set_main_target(main_target_sequence, target_species_id,
                                         main_domain_alignment.template_id.chain_id)

                    chain_alignments = self._make_alignments(main_target_sequence, target_species_id,
                                                             main_domain_alignment, context, require_resnum)

                    # Delete chains that aren't in the alignment set:
                    for chain_id in context.get_chain_ids():
                        if chain_id not in chain_alignments:
                            context.delete_chain(chain_id)

                    _log.debug("final alignments: {}".format([(chain_id, chain_alignments[chain_id])
                                                              for chain_id in context.get_chain_ids()]))
                    _log.debug("final template {} {}".format(context.template_pdbid,
                                                             [(chain_id, context.get_sequence(chain_id))
                                                              for chain_id in context.get_chain_ids()]))

                    tar_path = self._model_run(main_domain_alignment, chain_alignments, context, main_target_sequence, require_resnum)

            return tar_path
예제 #6
0
파일: model.py 프로젝트: cmbi/hommod-rest
    def build_model(self, main_target_sequence, target_species_id, main_domain_alignment, require_resnum=None):

        ModelLogger.get_current().add("building model with sequence {}, species {}, alignment {} and resnum {}"
                                      .format(main_target_sequence, target_species_id, main_domain_alignment, require_resnum))

        tar_path = model_storage.get_tar_path(main_target_sequence,
                                              target_species_id,
                                              main_domain_alignment,
                                              main_domain_alignment.template_id)

        with model_storage.get_model_lock(main_target_sequence, target_species_id,
                                          main_domain_alignment, main_domain_alignment.template_id):
            if not os.path.isfile(tar_path):

                context = self._prepare_context(main_domain_alignment.template_id.pdbid)

                # If the template is the same as the target, do no modeling:
                if main_domain_alignment.get_template_sequence() == context.get_sequence(main_domain_alignment.template_id.chain_id) and \
                        main_domain_alignment.get_percentage_identity() >= 100.0:

                    main_domain_alignment.target_id = model_storage.get_sequence_id(main_target_sequence)

                    tar_path = self._wrap_template(main_target_sequence, target_species_id,
                                                   main_domain_alignment, main_domain_alignment.template_id)
                    return tar_path


                context.set_main_target(main_target_sequence, target_species_id,
                                     main_domain_alignment.template_id.chain_id)

                chain_alignments = self._make_alignments(main_target_sequence, target_species_id,
                                                         main_domain_alignment, context, require_resnum)

                # Delete chains that aren't in the alignment set:
                for chain_id in context.get_chain_ids():
                    if chain_id not in chain_alignments:
                        context.delete_chain(chain_id)

                _log.debug("final alignments: {}".format([(chain_id, chain_alignments[chain_id])
                                                          for chain_id in context.get_chain_ids()]))
                _log.debug("final template {} {}".format(context.template_pdbid,
                                                         [(chain_id, context.get_sequence(chain_id))
                                                          for chain_id in context.get_chain_ids()]))

                tar_path = self._model_run(main_domain_alignment, chain_alignments, context)

        return tar_path
예제 #7
0
파일: model.py 프로젝트: cmbi/hommod
    def _group_identical_chains(self, context):

        sequences = {chain_id: context.get_sequence(chain_id)
                     for chain_id in context.get_chain_ids()}

        # If there's only 1 chain, then we don't need to do anything:
        if len(sequences) <= 1:
            return sequences.keys()

        alignments = {} 

        grouped = [] 

        ids = list(sequences.keys())
        while len(ids) > 0:
            id_ = ids[0]
            ids.remove(id_)
            grouped.append([id_])

            for other_id in ids[:]:

                pair = (id_, other_id)

                # Aligning them all in one run can be very time-consuming,
                # so align two at the time:
                if pair not in alignments:
                    alignments[pair] = clustal_aligner.align({id_: sequences[id_],
                                                              other_id: sequences[other_id]})
                    _log.debug("aligned {} with {}: {}".format(id_, other_id, alignments[pair]))

                if alignments[pair].get_percentage_identity(id_, other_id) >= 99.0 and \
                        alignments[pair].count_aligned_residues(id_, other_id) > 20:
                    grouped[-1].append(other_id)
                    ids.remove(other_id)

        ModelLogger.get_current().add("grouped identical chains: {}".format(grouped))

        return grouped
예제 #8
0
파일: model.py 프로젝트: cmbi/hommod-rest
    def _group_identical_chains(self, context):

        sequences = {chain_id: context.get_sequence(chain_id)
                     for chain_id in context.get_chain_ids()}

        # If there's only 1 chain, then we don't need to do anything:
        if len(sequences) <= 1:
            return sequences.keys()

        alignments = {} 

        grouped = [] 

        ids = list(sequences.keys())
        while len(ids) > 0:
            id_ = ids[0]
            ids.remove(id_)
            grouped.append([id_])

            for other_id in ids[:]:

                pair = (id_, other_id)

                # Aligning them all in one run can be very time-consuming,
                # so align two at the time:
                if pair not in alignments:
                    alignments[pair] = clustal_aligner.align({id_: sequences[id_],
                                                              other_id: sequences[other_id]})
                    _log.debug("aligned {} with {}: {}".format(id_, other_id, alignments[pair]))

                if alignments[pair].get_percentage_identity(id_, other_id) >= 99.0 and \
                        alignments[pair].count_aligned_residues(id_, other_id) > 20:
                    grouped[-1].append(other_id)
                    ids.remove(other_id)

        ModelLogger.get_current().add("grouped identical chains: {}".format(grouped))

        return grouped
예제 #9
0
파일: model.py 프로젝트: cmbi/hommod
    def _wrap_template(self, main_target_sequence, target_species_id, main_domain_alignment, template_id):
        model_name = model_storage.get_model_name(main_target_sequence, target_species_id,
                                                  main_domain_alignment, template_id)

        work_dir_path = tempfile.mkdtemp()
        align_fasta_path = os.path.join(work_dir_path, 'align.fa')
        full_target_path = os.path.join(work_dir_path, 'target.fa')

        write_fasta(full_target_path, {'target': main_target_sequence})

        try:
            os.chdir(work_dir_path)

            write_fasta(align_fasta_path, {'target': main_domain_alignment.target_alignment,
                                           str(template_id): main_domain_alignment.template_alignment})

            model_path = os.path.join(work_dir_path, 'target.pdb')
            with open(model_path, 'w') as f:
                f.write(get_pdb_contents(template_id.pdbid))

            self._write_selected_targets({template_id.chain_id: main_domain_alignment},
                                         os.path.join(work_dir_path, 'selected-targets.txt'))

            log_path = os.path.join(work_dir_path, 'model.log')
            ModelLogger.get_current().write(log_path)

            tar_path = model_storage.get_tar_path(main_target_sequence,
                                                  target_species_id,
                                                  main_domain_alignment,
                                                  template_id)
            with tarfile.open(tar_path, mode="w:gz") as ar:
                ar.add(work_dir_path, arcname=model_name)

            return tar_path
        finally:
            if os.path.isdir(work_dir_path):
                shutil.rmtree(work_dir_path)
예제 #10
0
파일: model.py 프로젝트: cmbi/hommod-rest
    def _wrap_template(self, main_target_sequence, target_species_id, main_domain_alignment, template_id):
        model_name = model_storage.get_model_name(main_target_sequence, target_species_id,
                                                  main_domain_alignment, template_id)

        work_dir_path = tempfile.mkdtemp()
        align_fasta_path = os.path.join(work_dir_path, 'align.fa')
        full_target_path = os.path.join(work_dir_path, 'target.fa')

        write_fasta(full_target_path, {'target': main_target_sequence})

        try:
            os.chdir(work_dir_path)

            write_fasta(align_fasta_path, {'target': main_domain_alignment.target_alignment,
                                           str(template_id): main_domain_alignment.template_alignment})

            model_path = os.path.join(work_dir_path, 'target.pdb')
            with open(model_path, 'w') as f:
                f.write(get_pdb_contents(template_id.pdbid))

            self._write_selected_targets({template_id.chain_id: main_domain_alignment},
                                         os.path.join(work_dir_path, 'selected-targets.txt'))

            log_path = os.path.join(work_dir_path, 'model.log')
            ModelLogger.get_current().write(log_path)

            tar_path = model_storage.get_tar_path(main_target_sequence,
                                                  target_species_id,
                                                  main_domain_alignment,
                                                  template_id)
            with tarfile.open(tar_path, mode="w:gz") as ar:
                ar.add(work_dir_path, arcname=model_name)

            return tar_path
        finally:
            if os.path.isdir(work_dir_path):
                shutil.rmtree(work_dir_path)
예제 #11
0
파일: model.py 프로젝트: cmbi/hommod
    def _model_run(self, main_domain_alignment, chain_alignments, context, main_target_sequence, require_resnum):

        model_name = model_storage.get_model_name(context.get_main_target_sequence(),
                                                  context.target_species_id,
                                                  main_domain_alignment,
                                                  TemplateID(context.template_pdbid,
                                                             context.main_target_chain_id))

        work_dir_path = tempfile.mkdtemp()
        full_target_path = os.path.join(work_dir_path, 'target.fa')
        align_fasta_path = os.path.join(work_dir_path, 'align.fa')
        output_yob_path = os.path.join(work_dir_path, 'target.yob')
        error_path = os.path.join(work_dir_path, 'errorexit.txt')
        error_scene_path = os.path.join(work_dir_path, 'errorexit.sce')
        before_scene_path =  os.path.join(work_dir_path, 'beforemodel.sce')

        write_fasta(full_target_path, {'target': context.get_main_target_sequence()})

        try:
            context.yasara.CD(work_dir_path)

            context.yasara.SaveSce(before_scene_path)

            chain_ids_before_model = context.get_chain_ids()
            sequences_before_model = {chain_id: context.get_sequence(chain_id) for chain_id in chain_ids_before_model}

            self._write_model_alignment_fasta(context, chain_alignments, align_fasta_path)

            context.yasara.Processors(1)

            context.yasara.ExperimentHomologyModeling(templateobj=context.template_obj,
                                                      alignfile=align_fasta_path,
                                                      templates="1, sameseq = 1",
                                                      alignments=1,
                                                      termextension=0,
                                                      oligostate=32,
                                                      looplenmax=10,
                                                      animation='fast',
                                                      speed='fast',
                                                      loopsamples=20,
                                                      resultfile='target')
            context.yasara.Experiment("On")
            context.yasara.Wait("Expend")

            if os.path.isfile(error_path):
                self._handle_error_txt(error_path, work_dir_path, context, main_domain_alignment)
            elif os.path.isfile(error_scene_path):
                raise ModelRunError("yasara exited with an error")

            if not os.path.isfile(output_yob_path):
                chain_ids_after_failure = context.get_chain_ids()

                if chain_ids_before_model != chain_ids_after_failure:
                    raise ModelRunError("During modeling, yasara changed the chains {} to {}"
                                        .format(chain_ids_before_model, chain_ids_after_failure))

                for chain_id in chain_ids_before_model:
                    sequence_after_failure = context.get_sequence(chain_id)
                    if sequence_after_failure != sequences_before_model[chain_id]:
                        raise ModelRunError("During modeling, yasara changed chain {} sequence {} to {}"
                                            .format(chain_id, sequences_before_model[chain_id], sequence_after_failure))

                raise ModelRunError("yasara generated no output yob, check the console for further details")

            chain_ids_after_build = context.get_chain_ids()
            if context.main_target_chain_id not in chain_ids_after_build:
                raise ModelRunError(f"The chain {context.main_target_chain_id} is not in the final model output by yasara")

            _log.debug("after modeling {}".format([(chain_id, context.get_sequence(chain_id))
                                                   for chain_id in context.get_chain_ids()]))
            _log.debug("input target aligned sequence:\n{}".format(main_domain_alignment.get_target_sequence_without_insertions()))

            if not any([context.get_sequence(chain_id) == main_domain_alignment.get_target_sequence_without_insertions()
                        for chain_id in context.get_chain_ids()]):
                if require_resnum is not None and not self._model_covers_residue(context, main_target_sequence, require_resnum):
                    raise ModelRunError("yasara generated a model that doesn't match the input alignment")

            model_path = os.path.join(work_dir_path, 'target.pdb')
            context.yasara.SavePDB(context.template_obj, model_path)

            self._write_selected_targets(chain_alignments, os.path.join(work_dir_path, 'selected-targets.txt'))

            log_path = os.path.join(work_dir_path, 'model.log')
            ModelLogger.get_current().write(log_path)

            tar_path = model_storage.get_tar_path(context.get_main_target_sequence(),
                                                  context.target_species_id,
                                                  main_domain_alignment,
                                                  TemplateID(context.template_pdbid,
                                                             context.main_target_chain_id))
            with tarfile.open(tar_path, mode="w:gz") as ar:
                ar.add(work_dir_path, arcname=model_name)

            return tar_path
        except RuntimeError as e:
            self._log_additional_error_info(e, chain_alignments, context)

            if os.path.isfile(error_path):
                self._handle_error_txt(error_path, work_dir_path, context, main_domain_alignment)
            elif os.path.isfile(error_scene_path):
                raise ModelRunError("yasara exited with an error")
            else:
                raise e
        finally:
            if os.path.isdir(work_dir_path):
                shutil.rmtree(work_dir_path)
예제 #12
0
파일: model.py 프로젝트: cmbi/hommod
    def _make_alignments(self, main_target_sequence, target_species_id,
                         main_domain_alignment, context, require_resnum):
        alignments = {}

        # Choose what chains to align the main_target_on
        main_target_chain_ids = self._pick_identical_chains(main_domain_alignment.template_id.chain_id,
                                                            context)

        ModelLogger.get_current().add("using template chains {} for the main target sequence".format(main_target_chain_ids))

        for chain_id in main_target_chain_ids:

            template_chain_sequence = context.get_sequence(chain_id)
            template_chain_secstr = context.get_secondary_structure(chain_id)

            local_alignment = kmad_aligner.align(template_chain_sequence, template_chain_secstr,
                                                 main_domain_alignment.get_target_sequence())
            alignments[chain_id] = DomainAlignment(local_alignment.target_alignment,
                                                   local_alignment.template_alignment,
                                                   main_domain_alignment.range,
                                                   main_domain_alignment.template_id)

            alignments[chain_id].target_id = model_storage.get_sequence_id(main_target_sequence)

        if require_resnum is not None and \
                not alignments[main_domain_alignment.template_id.chain_id].is_target_residue_covered(require_resnum):
            raise RuntimeError("Cannot align to chain {} so that residue {} is covered"
                               .format(main_domain_alignment.template_id.chain_id, require_resnum))


        # Try to find and align target sequences for interacting chains in the template,
        # while keeping in mind which residues interact and must thus be covered by the alignment.
        # We expand the set of involved template chains with every iteration,
        # until all template chains have been added.
        while len(alignments) < len(context.get_chain_ids()):

            # First, make python remember to which chains the candidate chains interact:
            candidate_chains_interacts_with = {}
            for aligned_chain_id in alignments:
                for interacting_chain_id in context.list_interacting_chains(aligned_chain_id):

                    ModelLogger.get_current().add("template chain {} interacts with {}"
                                                  .format(aligned_chain_id, interacting_chain_id))

                    # Skip those that we've already aligned, to prevent infinite loops:
                    if interacting_chain_id in alignments:
                        continue

                    if interacting_chain_id not in candidate_chains_interacts_with:
                        candidate_chains_interacts_with[interacting_chain_id] = []
                    candidate_chains_interacts_with[interacting_chain_id].append(aligned_chain_id)

            if len(candidate_chains_interacts_with) <= 0:
                break  # Nothing more to add

            # iterate over chains that might interact with the chains that are already in the set:
            for candidate_chain_id in candidate_chains_interacts_with:

                interacting_chain_alignments = {interacting_chain_id: alignments[interacting_chain_id]
                                                for interacting_chain_id in candidate_chains_interacts_with[candidate_chain_id]}

                template_chain_sequence = context.get_sequence(candidate_chain_id)
                template_chain_secstr = context.get_secondary_structure(candidate_chain_id)

                potential_target_sequences = self._find_target_sequences(template_chain_sequence,
                                                                         target_species_id)

                ModelLogger.get_current().add("choosing target sequence for template chain {} from {}"
                                              .format(candidate_chain_id, potential_target_sequences.keys()))

                alignments[candidate_chain_id] = self._choose_best_target_alignment(context,
                                                                                    interacting_chain_alignments,
                                                                                    potential_target_sequences,
                                                                                    candidate_chain_id)
                if alignments[candidate_chain_id] is None:
                    alignments[candidate_chain_id] = self._make_poly_A_alignment(context, candidate_chain_id)
                    alignments[candidate_chain_id].target_id = "poly-A"

                    ModelLogger.get_current().add("found no target for template chain {}, placing poly-A"
                                                  .format(candidate_chain_id))

        return alignments
예제 #13
0
    def get_domain_alignments(self, target_sequence, require_resnum=None, template_id=None):

        ModelLogger.get_current().add("getting domain alignments for sequence {}, resnum {}, template {}"
                                      .format(target_sequence, require_resnum, template_id))

        if self.min_percentage_coverage is None:
            raise InitError("min percentage coverage is not set")

        interpro_ranges = interpro.get_domain_ranges(target_sequence)
        _log.debug("{} ranges from interpro".format(len(interpro_ranges)))

        sample_ranges = self._filter_forbidden_ranges(interpro_ranges)

        if require_resnum is not None:
            sample_ranges = list(filter(lambda r: r.includes_residue(require_resnum), sample_ranges))
            _log.debug("{} ranges have residue {}".format(len(sample_ranges), require_resnum))

        # Add the whole sequence as a range too:
        sample_ranges.append(SequenceRange(0, len(target_sequence), target_sequence))

        ok_ranges_alignments = {}
        best_ranges_alignments = {}
        checked_ranges = []

        while len(sample_ranges) > 0:

            merged_sample_ranges = self._merge_similar_ranges(sample_ranges)

            _log.debug("sampling {} ranges".format(len(merged_sample_ranges)))

            # Check the largest ranges first. If that yields, then the smaller ones don't matter.
            for range_ in sorted(merged_sample_ranges, key=lambda r: r.get_length(), reverse=True):

                if range_ in checked_ranges:
                    continue  # already passed this one
                checked_ranges.append(range_)

                if any([r.encloses(range_) for r in best_ranges_alignments]):
                    continue  # we already have a larger enclosing range

                # These can differ per range:
                best_hit = None
                last_resort_hit = None

                ModelLogger.get_current().add("examining range {}".format(range_))

                hit_candidates = self._get_hits(range_, template_id)

                _log.debug('trying range: {} against {} hits'.format(range_, len(hit_candidates)))

                for hit_candidate in hit_candidates:
                    hit_range = hit_candidate.get_query_range()
                    if require_resnum is not None:
                        if not hit_candidate.is_query_residue_covered(require_resnum):
                            _log.debug("hit with {} on {} does not cover residue {}"
                                       .format(hit_candidate.get_hit_accession_code(),
                                               hit_range, require_resnum))
                            continue

                    if self._alignment_ok_for_range(range_, hit_candidate):
                        _log.debug("hit with {} {} is ok"
                                   .format(hit_candidate.get_hit_accession_code(), hit_range))

                        # This range made an OK alignment, so at least store it for later usage:
                        template_id = TemplateID(hit_candidate.get_hit_accession_code(),
                                                 hit_candidate.get_hit_chain_id())
                        ok_ranges_alignments[hit_range] = DomainAlignment(hit_candidate.query_alignment,
                                                                          hit_candidate.subject_alignment,
                                                                          hit_range, template_id)

                        ModelLogger.get_current().add("found a hit with {} covering range {}:\n{}"
                                                      .format(template_id, hit_range, hit_candidate))


                        if hit_candidate.get_percentage_coverage() > self.min_percentage_coverage:

                            _log.debug("coverage is high enough for {} {}"
                                       .format(hit_candidate.get_hit_accession_code(), hit_range))

                            if best_hit is None or self._is_better_than(hit_candidate, best_hit):

                                _log.debug("{} is better than {}".format(hit_candidate, best_hit))
                                ModelLogger.get_current().add("{} is better than {}".format(hit_candidate, best_hit))

                                best_hit = hit_candidate
                        else:
                            last_resort_hit = hit_candidate

                if best_hit is None:
                    best_hit = last_resort_hit

                if best_hit is not None:

                    # Remove any smaller ranges that this one encloses:
                    best_ranges_alignments = self._remove_enclosing(range_, best_ranges_alignments)

                    template_id = TemplateID(best_hit.get_hit_accession_code(),
                                             best_hit.get_hit_chain_id())

                    hit_range = best_hit.get_query_range()
                    _log.debug("passing best hit with template {} with range {}".format(template_id, hit_range))

                    best_ranges_alignments[hit_range] = DomainAlignment(best_hit.query_alignment,
                                                                        best_hit.subject_alignment,
                                                                        hit_range, template_id)
                else:
                    _log.debug("no hit for range {}".format(range_))

            # After iterating the sample ranges, prepare for the next round:
            sample_ranges = self._clean_search_space(checked_ranges, sample_ranges, ok_ranges_alignments)

        return list(best_ranges_alignments.values())
예제 #14
0
파일: domain.py 프로젝트: cmbi/hommod-rest
    def get_domain_alignments(self, target_sequence, require_resnum=None, template_id=None):

        ModelLogger.get_current().add("getting domain alignments for sequence {}, resnum {}, template {}"
                                      .format(target_sequence, require_resnum, template_id))

        if self.min_percentage_coverage is None:
            raise InitError("min percentage coverage is not set")

        interpro_ranges = interpro.get_domain_ranges(target_sequence)
        _log.debug("{} ranges from interpro".format(len(interpro_ranges)))

        sample_ranges = self._filter_forbidden_ranges(interpro_ranges)

        if require_resnum is not None:
            sample_ranges = list(filter(lambda r: r.includes_residue(require_resnum), sample_ranges))
            _log.debug("{} ranges have residue {}".format(len(sample_ranges), require_resnum))

        # Add the whole sequence as a range too:
        sample_ranges.append(SequenceRange(0, len(target_sequence), target_sequence))

        ok_ranges_alignments = {}
        best_ranges_alignments = {}
        checked_ranges = []

        while len(sample_ranges) > 0:

            merged_sample_ranges = self._merge_similar_ranges(sample_ranges)

            _log.debug("sampling {} ranges".format(len(merged_sample_ranges)))

            # Check the largest ranges first. If that yields, then the smaller ones don't matter.
            for range_ in sorted(merged_sample_ranges, key=lambda r: r.get_length(), reverse=True):

                if range_ in checked_ranges:
                    continue  # already passed this one
                checked_ranges.append(range_)

                if any([r.encloses(range_) for r in best_ranges_alignments]):
                    continue  # we already have a larger enclosing range

                # These can differ per range:
                best_hit = None
                last_resort_hit = None

                ModelLogger.get_current().add("examining range {}".format(range_))

                hit_candidates = self._get_hits(range_, template_id)

                _log.debug('trying range: {} against {} hits'.format(range_, len(hit_candidates)))

                for hit_candidate in hit_candidates:
                    hit_range = hit_candidate.get_query_range()
                    if require_resnum is not None:
                        if not hit_candidate.is_query_residue_covered(require_resnum):
                            _log.debug("hit with {} on {} does not cover residue {}"
                                       .format(hit_candidate.get_hit_accession_code(),
                                               hit_range, require_resnum))
                            continue

                    if self._alignment_ok_for_range(range_, hit_candidate):
                        _log.debug("hit with {} {} is ok"
                                   .format(hit_candidate.get_hit_accession_code(), hit_range))

                        # This range made an OK alignment, so at least store it for later usage:
                        template_id = TemplateID(hit_candidate.get_hit_accession_code(),
                                                 hit_candidate.get_hit_chain_id())
                        ok_ranges_alignments[hit_range] = DomainAlignment(hit_candidate.query_alignment,
                                                                          hit_candidate.subject_alignment,
                                                                          hit_range, template_id)

                        ModelLogger.get_current().add("found a hit with {} covering range {}:\n{}"
                                                      .format(template_id, hit_range, hit_candidate))


                        if hit_candidate.get_percentage_coverage() > self.min_percentage_coverage:

                            _log.debug("coverage is high enough for {} {}"
                                       .format(hit_candidate.get_hit_accession_code(), hit_range))

                            if best_hit is None or self._is_better_than(hit_candidate, best_hit):

                                _log.debug("{} is better than {}".format(hit_candidate, best_hit))
                                ModelLogger.get_current().add("{} is better than {}".format(hit_candidate, best_hit))

                                best_hit = hit_candidate
                        else:
                            last_resort_hit = hit_candidate

                if best_hit is None:
                    best_hit = last_resort_hit

                if best_hit is not None:

                    # Remove any smaller ranges that this one encloses:
                    best_ranges_alignments = self._remove_enclosing(range_, best_ranges_alignments)

                    template_id = TemplateID(best_hit.get_hit_accession_code(),
                                             best_hit.get_hit_chain_id())

                    hit_range = best_hit.get_query_range()
                    _log.debug("passing best hit with template {} with range {}".format(template_id, hit_range))

                    best_ranges_alignments[hit_range] = DomainAlignment(best_hit.query_alignment,
                                                                        best_hit.subject_alignment,
                                                                        hit_range, template_id)
                else:
                    _log.debug("no hit for range {}".format(range_))

            # After iterating the sample ranges, prepare for the next round:
            sample_ranges = self._clean_search_space(checked_ranges, sample_ranges, ok_ranges_alignments)

        return list(best_ranges_alignments.values())
예제 #15
0
파일: model.py 프로젝트: cmbi/hommod-rest
    def _model_run(self, main_domain_alignment, chain_alignments, context):

        model_name = model_storage.get_model_name(context.get_main_target_sequence(),
                                                  context.target_species_id,
                                                  main_domain_alignment,
                                                  TemplateID(context.template_pdbid,
                                                             context.main_target_chain_id))

        work_dir_path = tempfile.mkdtemp()
        full_target_path = os.path.join(work_dir_path, 'target.fa')
        align_fasta_path = os.path.join(work_dir_path, 'align.fa')
        output_yob_path = os.path.join(work_dir_path, 'target.yob')
        error_path = os.path.join(work_dir_path, 'errorexit.txt')
        error_scene_path = os.path.join(work_dir_path, 'errorexit.sce')
        before_scene_path =  os.path.join(work_dir_path, 'beforemodel.sce')

        write_fasta(full_target_path, {'target': context.get_main_target_sequence()})

        try:
            context.yasara.CD(work_dir_path)

            context.yasara.SaveSce(before_scene_path)

            chain_ids_before_model = context.get_chain_ids()
            sequences_before_model = {chain_id: context.get_sequence(chain_id) for chain_id in chain_ids_before_model}

            self._write_model_alignment_fasta(context, chain_alignments, align_fasta_path)

            context.yasara.Processors(1)

            context.yasara.ExperimentHomologyModeling(templateobj=context.template_obj,
                                                      alignfile=align_fasta_path,
                                                      templates="1, sameseq = 1",
                                                      alignments=1,
                                                      termextension=0,
                                                      oligostate=32,
                                                      looplenmax=10,
                                                      animation='fast',
                                                      speed='fast',
                                                      loopsamples=20,
                                                      resultfile='target')
            context.yasara.Experiment("On")
            context.yasara.Wait("Expend")

            if os.path.isfile(error_path):
                self._handle_error_txt(error_path, work_dir_path, context, main_domain_alignment)
            elif os.path.isfile(error_scene_path):
                raise ModelRunError("yasara exited with an error")

            if not os.path.isfile(output_yob_path):
                chain_ids_after_failure = context.get_chain_ids()

                if chain_ids_before_model != chain_ids_after_failure:
                    raise ModelRunError("During modeling, yasara changed the chains {} to {}"
                                        .format(chain_ids_before_model, chain_ids_after_failure))

                for chain_id in chain_ids_before_model:
                    sequence_after_failure = context.get_sequence(chain_id)
                    if sequence_after_failure != sequences_before_model[chain_id]:
                        raise ModelRunError("During modeling, yasara changed chain {} sequence {} to {}"
                                            .format(chain_id, sequences_before_model[chain_id], sequence_after_failure))

                raise ModelRunError("yasara generated no output yob, check the console for further details")

            model_path = os.path.join(work_dir_path, 'target.pdb')
            context.yasara.SavePDB(context.template_obj, model_path)

            self._write_selected_targets(chain_alignments, os.path.join(work_dir_path, 'selected-targets.txt'))

            log_path = os.path.join(work_dir_path, 'model.log')
            ModelLogger.get_current().write(log_path)

            tar_path = model_storage.get_tar_path(context.get_main_target_sequence(),
                                                  context.target_species_id,
                                                  main_domain_alignment,
                                                  TemplateID(context.template_pdbid,
                                                             context.main_target_chain_id))
            with tarfile.open(tar_path, mode="w:gz") as ar:
                ar.add(work_dir_path, arcname=model_name)

            return tar_path
        except RuntimeError as e:
            self._log_additional_error_info(e, chain_alignments, context)

            if os.path.isfile(error_path):
                self._handle_error_txt(error_path, work_dir_path, context, main_domain_alignment)
            elif os.path.isfile(error_scene_path):
                raise ModelRunError("yasara exited with an error")
            else:
                raise e
        finally:
            if os.path.isdir(work_dir_path):
                shutil.rmtree(work_dir_path)
예제 #16
0
파일: model.py 프로젝트: cmbi/hommod-rest
    def _make_alignments(self, main_target_sequence, target_species_id,
                         main_domain_alignment, context, require_resnum):
        alignments = {}

        # Choose what chains to align the main_target_on
        main_target_chain_ids = self._pick_identical_chains(main_domain_alignment.template_id.chain_id,
                                                            context)

        ModelLogger.get_current().add("using template chains {} for the main target sequence".format(main_target_chain_ids))

        for chain_id in main_target_chain_ids:

            template_chain_sequence = context.get_sequence(chain_id)
            template_chain_secstr = context.get_secondary_structure(chain_id)

            local_alignment = kmad_aligner.align(template_chain_sequence, template_chain_secstr,
                                                 main_domain_alignment.get_target_sequence())
            alignments[chain_id] = DomainAlignment(local_alignment.target_alignment,
                                                   local_alignment.template_alignment,
                                                   main_domain_alignment.range,
                                                   main_domain_alignment.template_id)

            alignments[chain_id].target_id = model_storage.get_sequence_id(main_target_sequence)

        if require_resnum is not None and \
                not alignments[main_domain_alignment.template_id.chain_id].is_target_residue_covered(require_resnum):
            raise RuntimeError("Cannot align to chain {} so that residue {} is covered"
                               .format(main_domain_alignment.template_id.chain_id, require_resnum))


        # Try to find and align target sequences for interacting chains in the template,
        # while keeping in mind which residues interact and must thus be covered by the alignment.
        # We expand the set of involved template chains with every iteration,
        # until all template chains have been added.
        while len(alignments) < len(context.get_chain_ids()):

            # First, make python remember to which chains the candidate chains interact:
            candidate_chains_interacts_with = {}
            for aligned_chain_id in alignments:
                for interacting_chain_id in context.list_interacting_chains(aligned_chain_id):

                    ModelLogger.get_current().add("template chain {} interacts with {}"
                                                  .format(aligned_chain_id, interacting_chain_id))

                    # Skip those that we've already aligned, to prevent infinite loops:
                    if interacting_chain_id in alignments:
                        continue

                    if interacting_chain_id not in candidate_chains_interacts_with:
                        candidate_chains_interacts_with[interacting_chain_id] = []
                    candidate_chains_interacts_with[interacting_chain_id].append(aligned_chain_id)

            if len(candidate_chains_interacts_with) <= 0:
                break  # Nothing more to add

            # iterate over chains that might interact with the chains that are already in the set:
            for candidate_chain_id in candidate_chains_interacts_with:

                interacting_chain_alignments = {interacting_chain_id: alignments[interacting_chain_id]
                                                for interacting_chain_id in candidate_chains_interacts_with[candidate_chain_id]}

                template_chain_sequence = context.get_sequence(candidate_chain_id)
                template_chain_secstr = context.get_secondary_structure(candidate_chain_id)

                potential_target_sequences = self._find_target_sequences(template_chain_sequence,
                                                                         target_species_id)

                ModelLogger.get_current().add("choosing target sequence for template chain {} from {}"
                                              .format(candidate_chain_id, potential_target_sequences.keys()))

                alignments[candidate_chain_id] = self._choose_best_target_alignment(context,
                                                                                    interacting_chain_alignments,
                                                                                    potential_target_sequences,
                                                                                    candidate_chain_id)
                if alignments[candidate_chain_id] is None:
                    alignments[candidate_chain_id] = self._make_poly_A_alignment(context, candidate_chain_id)
                    alignments[candidate_chain_id].target_id = "poly-A"

                    ModelLogger.get_current().add("found no target for template chain {}, placing poly-A"
                                                  .format(candidate_chain_id))

        return alignments