def test_align_models_homo(self): work_dir = os.path.join(self.tests_dir, 'theseus_align_homo') if not os.path.isdir(work_dir): os.mkdir(work_dir) pdb_list = ['1D7M.pdb', '1GU8.pdb', '2UUI.pdb', '1K33.pdb', '1BYZ.pdb'] models = [] tokeep_idx = [i for i in range(12)] for pdb in pdb_list: pdbin = os.path.join(self.testfiles_dir, pdb) name = os.path.splitext(pdb)[0] pdbout = os.path.join(self.testfiles_dir, "{0}_cut.pdb".format(name)) pdb_edit.select_residues(pdbin, pdbout, tokeep_idx=tokeep_idx) models.append(pdbout) homologs = True rtheseus = theseus.Theseus(work_dir=work_dir, theseus_exe=self.theseus_exe) rtheseus.superpose_models(models, homologs=homologs) var_by_res = rtheseus.var_by_res # Below with theseus 3.1.1 on osx 10.9.5 ref = [ (0, 243, 8.049061), (1, 244, 2.614031), (2, 245, 1.343609), (3, 246, 2.261761), (4, 247, 1.112115), (5, 248, 0.574936), (6, 249, 0.03114), (7, 250, 0.002894), (8, 251, 0.002314), (9, 252, 0.002174), (10, 253, 0.016252), (11, 254, 0.109965), ] self.assertEqual([x.idx for x in var_by_res], [x[0] for x in ref]) self.assertEqual([x.resSeq for x in var_by_res], [x[1] for x in ref]) for i, (t, r) in enumerate( zip([x.variance for x in var_by_res], [x[2] for x in ref])): self.assertTrue( abs(t - r) < 0.0001, "Mismatch for: {0} {1} {2}".format(i, t, r)) self.assertTrue( all([ os.path.isfile(os.path.join(work_dir, m)) for m in rtheseus.aligned_models ])) for m in models: os.unlink(m) shutil.rmtree(work_dir)
def superpose_models(self, models, basename='theseus', work_dir=None, homologs=False): run_theseus = theseus.Theseus(work_dir=work_dir, theseus_exe=self.theseus_exe) try: run_theseus.superpose_models(models, basename=basename, homologs=homologs) except Exception, e: logger.critical("Error running theseus: {0}".format(e)) return False
def test_align_models(self): ## BUG - hlfsimko # CAUSES PROBLEMS ON MAC BECAUSE OF SYM LINK IN root on Mac OS X tests_dir = os.environ["HOMEPATH"] if sys.platform.startswith("win") \ else os.environ["HOME"] work_dir = os.path.join(tests_dir, 'theseus_align') models = glob.glob(os.path.join(self.testfiles_dir, 'models', '*.pdb')) homologs = False rtheseus = theseus.Theseus(work_dir=work_dir, theseus_exe=self.theseus_exe) rtheseus.superpose_models(models, homologs=homologs) var_by_res = rtheseus.var_by_res ref = [(0, 1, 55.757593), (1, 2, 46.981238), (2, 3, 47.734236), (3, 4, 39.857326), (4, 5, 35.477433), (5, 6, 26.066719), (6, 7, 24.114493), (7, 8, 24.610988), (8, 9, 21.187142), (9, 10, 21.882375), (10, 11, 21.622263), (11, 12, 18.680601), (12, 13, 16.568074), (13, 14, 14.889583), (14, 15, 13.889769), (15, 16, 8.722903), (16, 17, 8.719501), (17, 18, 4.648107), (18, 19, 4.263961), (19, 20, 2.338545), (20, 21, 1.412784), (21, 22, 0.57754), (22, 23, 0.204917), (23, 24, 0.226518), (24, 25, 0.162323), (25, 26, 0.068066), (26, 27, 0.057023), (27, 28, 0.135811), (28, 29, 0.145613), (29, 30, 0.081845), (30, 31, 0.051059), (31, 32, 0.045182), (32, 33, 0.112322), (33, 34, 0.102072), (34, 35, 0.446003), (35, 36, 0.504418), (36, 37, 1.276947), (37, 38, 2.641781), (38, 39, 4.336794), (39, 40, 6.484846), (40, 41, 9.559536), (41, 42, 14.467942), (42, 43, 22.818975), (43, 44, 29.55385), (44, 45, 34.692256), (45, 46, 35.141769), (46, 47, 40.41399), (47, 48, 52.268871), (48, 49, 54.535848), (49, 50, 49.527155), (50, 51, 67.9861), (51, 52, 58.661069), (52, 53, 41.802971), (53, 54, 57.085415), (54, 55, 71.944127), (55, 56, 57.893953), (56, 57, 54.34137), (57, 58, 77.736775), (58, 59, 83.279371)] self.assertEqual([x.idx for x in var_by_res], [x[0] for x in ref]) self.assertEqual([x.resSeq for x in var_by_res], [x[1] for x in ref]) for i, (t, r) in enumerate( zip([x.variance for x in var_by_res], [x[2] for x in ref])): self.assertTrue( abs(t - r) < 0.0001, "Mismatch for: {0} {1} {2}".format(i, t, r)) shutil.rmtree(work_dir)
def calculate_truncations(self, models=None, truncation_method=None, percent_truncation=None, percent_fixed_intervals=None, truncation_pruning=None, residue_scores=None, alignment_file=None, homologs=False): """Returns a list of Truncation objects, one for each truncation level. This method doesn't do any truncating - it just calculates the data for each truncation level. """ assert (len(models) > 1 or residue_scores), "Cannot truncate as < 2 models!" assert truncation_method and percent_truncation, "Missing arguments: {0} : {1}".format( truncation_method, percent_truncation) assert ample_util.is_exe(self.theseus_exe), "Cannot find theseus_exe: {0}".format(self.theseus_exe) # Create the directories we'll be working in assert self.work_dir and os.path.isdir(self.work_dir), "truncate_models needs a self.work_dir" os.chdir(self.work_dir) self.models = models # Calculate variances between pdb and align them (we currently only require the aligned models for homologs) if truncation_method != TRUNCATION_METHODS.SCORES: run_theseus = theseus.Theseus(work_dir=self.work_dir, theseus_exe=self.theseus_exe) try: run_theseus.superpose_models(self.models, homologs=homologs, alignment_file=alignment_file) self.aligned_models = run_theseus.aligned_models except RuntimeError as e: logger.critical(e) return [] if homologs: # If using homologs, now trim down to the core. We only do this here so that we are using the aligned models from # theseus, which makes it easier to see what the truncation is doing. models = model_core_from_fasta( self.aligned_models, alignment_file=alignment_file, work_dir=os.path.join(self.work_dir, 'core_models')) # Unfortunately Theseus doesn't print all residues in its output format, so we can't use the variances we calculated before and # need to calculate the variances of the core models try: run_theseus.superpose_models(models, homologs=homologs, basename='homologs_core') self.models = run_theseus.aligned_models self.aligned_models = run_theseus.aligned_models except RuntimeError as e: logger.critical(e) return [] if truncation_method == TRUNCATION_METHODS.SCORES: var_by_res = self._convert_residue_scores(residue_scores) else: var_by_res = run_theseus.var_by_res if len(var_by_res) <= 0: raise RuntimeError("Error reading residue variances!") logger.info('Using truncation method: %s', truncation_method) # Calculate which residues to keep under the different methods if truncation_method in [ TRUNCATION_METHODS.PERCENT, TRUNCATION_METHODS.PERCENT_FIXED, TRUNCATION_METHODS.SCORES ]: truncation_levels, truncation_variances, truncation_residues, truncation_residue_idxs = calculate_residues_by_percent( var_by_res, percent_truncation=percent_truncation, percent_fixed_intervals=percent_fixed_intervals) elif truncation_method == TRUNCATION_METHODS.FOCUSED: truncation_levels, truncation_variances, truncation_residues, truncation_residue_idxs = calculate_residues_focussed( var_by_res) else: raise RuntimeError("Unrecognised ensembling mode: {}".format(truncation_method)) # Somewhat of a hack to save the data so we can put it in the amoptd self.truncation_levels = truncation_levels self.truncation_variances = truncation_variances self.truncation_nresidues = [len(r) for r in truncation_residues] truncations = [] for tlevel, tvar, tresidues, tresidue_idxs in zip(truncation_levels, truncation_variances, truncation_residues, truncation_residue_idxs): # Prune singletone/doubletone etc. residues if required logger.debug("truncation_pruning: %s", truncation_pruning) if truncation_pruning == 'single': tresidue_idxs, pruned_residues = prune_residues(tresidue_idxs, chunk_size=1, allowed_gap=2) if pruned_residues: logger.debug("prune_residues removing: %s", pruned_residues) elif truncation_pruning is None: pass else: raise RuntimeError("Unrecognised truncation_pruning: {0}".format(truncation_pruning)) # Skip if there are no residues if not tresidue_idxs: logger.debug("Skipping truncation level %s with variance %s as no residues", tlevel, tvar) continue truncation = Truncation() truncation.method = truncation_method truncation.percent = percent_truncation truncation.level = tlevel truncation.variances = tvar truncation.residues = tresidues truncation.residues_idxs = tresidue_idxs truncations.append(truncation) return truncations