예제 #1
0
    def test_align_models_homo(self):
        work_dir = os.path.join(self.tests_dir, 'theseus_align_homo')
        if not os.path.isdir(work_dir):
            os.mkdir(work_dir)
        pdb_list = ['1D7M.pdb', '1GU8.pdb', '2UUI.pdb', '1K33.pdb', '1BYZ.pdb']
        models = []
        tokeep_idx = [i for i in range(12)]
        for pdb in pdb_list:
            pdbin = os.path.join(self.testfiles_dir, pdb)
            name = os.path.splitext(pdb)[0]
            pdbout = os.path.join(self.testfiles_dir,
                                  "{0}_cut.pdb".format(name))
            pdb_edit.select_residues(pdbin, pdbout, tokeep_idx=tokeep_idx)
            models.append(pdbout)

        homologs = True
        rtheseus = theseus.Theseus(work_dir=work_dir,
                                   theseus_exe=self.theseus_exe)
        rtheseus.superpose_models(models, homologs=homologs)
        var_by_res = rtheseus.var_by_res
        # Below with theseus 3.1.1 on osx 10.9.5
        ref = [
            (0, 243, 8.049061),
            (1, 244, 2.614031),
            (2, 245, 1.343609),
            (3, 246, 2.261761),
            (4, 247, 1.112115),
            (5, 248, 0.574936),
            (6, 249, 0.03114),
            (7, 250, 0.002894),
            (8, 251, 0.002314),
            (9, 252, 0.002174),
            (10, 253, 0.016252),
            (11, 254, 0.109965),
        ]

        self.assertEqual([x.idx for x in var_by_res], [x[0] for x in ref])
        self.assertEqual([x.resSeq for x in var_by_res], [x[1] for x in ref])
        for i, (t, r) in enumerate(
                zip([x.variance for x in var_by_res], [x[2] for x in ref])):
            self.assertTrue(
                abs(t - r) < 0.0001,
                "Mismatch for: {0} {1} {2}".format(i, t, r))
        self.assertTrue(
            all([
                os.path.isfile(os.path.join(work_dir, m))
                for m in rtheseus.aligned_models
            ]))
        for m in models:
            os.unlink(m)
        shutil.rmtree(work_dir)
예제 #2
0
 def superpose_models(self,
                      models,
                      basename='theseus',
                      work_dir=None,
                      homologs=False):
     run_theseus = theseus.Theseus(work_dir=work_dir,
                                   theseus_exe=self.theseus_exe)
     try:
         run_theseus.superpose_models(models,
                                      basename=basename,
                                      homologs=homologs)
     except Exception, e:
         logger.critical("Error running theseus: {0}".format(e))
         return False
예제 #3
0
    def test_align_models(self):
        ## BUG - hlfsimko
        # CAUSES PROBLEMS ON MAC BECAUSE OF SYM LINK IN root on Mac OS X
        tests_dir = os.environ["HOMEPATH"] if sys.platform.startswith("win") \
                        else os.environ["HOME"]
        work_dir = os.path.join(tests_dir, 'theseus_align')
        models = glob.glob(os.path.join(self.testfiles_dir, 'models', '*.pdb'))

        homologs = False
        rtheseus = theseus.Theseus(work_dir=work_dir,
                                   theseus_exe=self.theseus_exe)
        rtheseus.superpose_models(models, homologs=homologs)
        var_by_res = rtheseus.var_by_res
        ref = [(0, 1, 55.757593), (1, 2, 46.981238), (2, 3, 47.734236),
               (3, 4, 39.857326), (4, 5, 35.477433), (5, 6, 26.066719),
               (6, 7, 24.114493), (7, 8, 24.610988), (8, 9, 21.187142),
               (9, 10, 21.882375), (10, 11, 21.622263), (11, 12, 18.680601),
               (12, 13, 16.568074), (13, 14, 14.889583), (14, 15, 13.889769),
               (15, 16, 8.722903), (16, 17, 8.719501), (17, 18, 4.648107),
               (18, 19, 4.263961), (19, 20, 2.338545), (20, 21, 1.412784),
               (21, 22, 0.57754), (22, 23, 0.204917), (23, 24, 0.226518),
               (24, 25, 0.162323), (25, 26, 0.068066), (26, 27, 0.057023),
               (27, 28, 0.135811), (28, 29, 0.145613), (29, 30, 0.081845),
               (30, 31, 0.051059), (31, 32, 0.045182), (32, 33, 0.112322),
               (33, 34, 0.102072), (34, 35, 0.446003), (35, 36, 0.504418),
               (36, 37, 1.276947), (37, 38, 2.641781), (38, 39, 4.336794),
               (39, 40, 6.484846), (40, 41, 9.559536), (41, 42, 14.467942),
               (42, 43, 22.818975), (43, 44, 29.55385), (44, 45, 34.692256),
               (45, 46, 35.141769), (46, 47, 40.41399), (47, 48, 52.268871),
               (48, 49, 54.535848), (49, 50, 49.527155), (50, 51, 67.9861),
               (51, 52, 58.661069), (52, 53, 41.802971), (53, 54, 57.085415),
               (54, 55, 71.944127), (55, 56, 57.893953), (56, 57, 54.34137),
               (57, 58, 77.736775), (58, 59, 83.279371)]

        self.assertEqual([x.idx for x in var_by_res], [x[0] for x in ref])
        self.assertEqual([x.resSeq for x in var_by_res], [x[1] for x in ref])
        for i, (t, r) in enumerate(
                zip([x.variance for x in var_by_res], [x[2] for x in ref])):
            self.assertTrue(
                abs(t - r) < 0.0001,
                "Mismatch for: {0} {1} {2}".format(i, t, r))
        shutil.rmtree(work_dir)
예제 #4
0
    def calculate_truncations(self,
                              models=None,
                              truncation_method=None,
                              percent_truncation=None,
                              percent_fixed_intervals=None,
                              truncation_pruning=None,
                              residue_scores=None,
                              alignment_file=None,
                              homologs=False):
        """Returns a list of Truncation objects, one for each truncation level.

        This method doesn't do any truncating - it just calculates the data for each truncation level.
        """

        assert (len(models) > 1 or residue_scores), "Cannot truncate as < 2 models!"
        assert truncation_method and percent_truncation, "Missing arguments: {0} : {1}".format(
            truncation_method, percent_truncation)
        assert ample_util.is_exe(self.theseus_exe), "Cannot find theseus_exe: {0}".format(self.theseus_exe)

        # Create the directories we'll be working in
        assert self.work_dir and os.path.isdir(self.work_dir), "truncate_models needs a self.work_dir"
        os.chdir(self.work_dir)

        self.models = models
        # Calculate variances between pdb and align them (we currently only require the aligned models for homologs)
        if truncation_method != TRUNCATION_METHODS.SCORES:
            run_theseus = theseus.Theseus(work_dir=self.work_dir, theseus_exe=self.theseus_exe)
            try:
                run_theseus.superpose_models(self.models, homologs=homologs, alignment_file=alignment_file)
                self.aligned_models = run_theseus.aligned_models
            except RuntimeError as e:
                logger.critical(e)
                return []

        if homologs:
            # If using homologs, now trim down to the core. We only do this here so that we are using the aligned models from
            # theseus, which makes it easier to see what the truncation is doing.
            models = model_core_from_fasta(
                self.aligned_models, alignment_file=alignment_file, work_dir=os.path.join(self.work_dir, 'core_models'))
            # Unfortunately Theseus doesn't print all residues in its output format, so we can't use the variances we calculated before and
            # need to calculate the variances of the core models
            try:
                run_theseus.superpose_models(models, homologs=homologs, basename='homologs_core')
                self.models = run_theseus.aligned_models
                self.aligned_models = run_theseus.aligned_models
            except RuntimeError as e:
                logger.critical(e)
                return []

        if truncation_method == TRUNCATION_METHODS.SCORES:
            var_by_res = self._convert_residue_scores(residue_scores)
        else:
            var_by_res = run_theseus.var_by_res
        if len(var_by_res) <= 0:
            raise RuntimeError("Error reading residue variances!")

        logger.info('Using truncation method: %s', truncation_method)
        # Calculate which residues to keep under the different methods
        if truncation_method in [
                TRUNCATION_METHODS.PERCENT, TRUNCATION_METHODS.PERCENT_FIXED,
                TRUNCATION_METHODS.SCORES
        ]:
            truncation_levels, truncation_variances, truncation_residues, truncation_residue_idxs = calculate_residues_by_percent(
                var_by_res, percent_truncation=percent_truncation, percent_fixed_intervals=percent_fixed_intervals)
        elif truncation_method == TRUNCATION_METHODS.FOCUSED:
            truncation_levels, truncation_variances, truncation_residues, truncation_residue_idxs = calculate_residues_focussed(
                var_by_res)
        else:
            raise RuntimeError("Unrecognised ensembling mode: {}".format(truncation_method))

        # Somewhat of a hack to save the data so we can put it in the amoptd
        self.truncation_levels = truncation_levels
        self.truncation_variances = truncation_variances
        self.truncation_nresidues = [len(r) for r in truncation_residues]
        truncations = []
        for tlevel, tvar, tresidues, tresidue_idxs in zip(truncation_levels, truncation_variances, truncation_residues,
                                                          truncation_residue_idxs):
            # Prune singletone/doubletone etc. residues if required
            logger.debug("truncation_pruning: %s", truncation_pruning)
            if truncation_pruning == 'single':
                tresidue_idxs, pruned_residues = prune_residues(tresidue_idxs, chunk_size=1, allowed_gap=2)
                if pruned_residues:
                    logger.debug("prune_residues removing: %s", pruned_residues)
            elif truncation_pruning is None:
                pass
            else:
                raise RuntimeError("Unrecognised truncation_pruning: {0}".format(truncation_pruning))

            # Skip if there are no residues
            if not tresidue_idxs:
                logger.debug("Skipping truncation level %s with variance %s as no residues", tlevel, tvar)
                continue
            truncation = Truncation()
            truncation.method = truncation_method
            truncation.percent = percent_truncation
            truncation.level = tlevel
            truncation.variances = tvar
            truncation.residues = tresidues
            truncation.residues_idxs = tresidue_idxs
            truncations.append(truncation)
        return truncations