def test_resSeqMap3(self): # See if we can sort out the indexing between the native and model nativePdb = os.path.join(self.testfiles_dir,"2UUI.pdb") modelPdb = os.path.join(self.testfiles_dir,"2UUI_S_00000001.pdb") chainA = "2UUI_A.pdb" pdb_edit.extract_chain( nativePdb, chainA, chainID='A' ) chainAstd = "2UUI_A_std.pdb" pdb_edit.standardise(chainA, chainAstd) resSeqMap = residue_map.residueSequenceMap( chainA, modelPdb ) self.assertEqual( 156, resSeqMap._lenMatch() ) nativeMask = [ False ] * 155 + [ True ] self.assertEqual( resSeqMap.refCAlphaMask, nativeMask) self.assertEqual( resSeqMap.ref2target(10), 16 ) self.assertEqual( resSeqMap.target2ref(155), 149 ) # Check ends match up m1 = resSeqMap.targetResSeq[ resSeqMap.targetOffset ] n1 = resSeqMap.target2ref( m1 ) self.assertEqual( m1, resSeqMap.ref2target(n1) ) re = resSeqMap.refResSeq[ resSeqMap.refOffset + resSeqMap.lenMatch - 1 ] self.assertEqual( resSeqMap.ref2target( re ), resSeqMap.targetResSeq[ resSeqMap.targetOffset + resSeqMap.lenMatch - 1 ] ) os.unlink( chainA ) os.unlink( chainAstd )
def test_resSeqMap3(self): # See if we can sort out the indexing between the native and model nativePdb = os.path.join(self.testfiles_dir, "2UUI.pdb") modelPdb = os.path.join(self.testfiles_dir, "2UUI_S_00000001.pdb") chainA = "2UUI_A.pdb" pdb_edit.extract_chain(nativePdb, chainA, chainID='A') chainAstd = "2UUI_A_std.pdb" pdb_edit.standardise(chainA, chainAstd) resSeqMap = residue_map.residueSequenceMap(chainA, modelPdb) self.assertEqual(156, resSeqMap._lenMatch()) nativeMask = [False] * 155 + [True] self.assertEqual(resSeqMap.refCAlphaMask, nativeMask) self.assertEqual(resSeqMap.ref2target(10), 16) self.assertEqual(resSeqMap.target2ref(155), 149) # Check ends match up m1 = resSeqMap.targetResSeq[resSeqMap.targetOffset] n1 = resSeqMap.target2ref(m1) self.assertEqual(m1, resSeqMap.ref2target(n1)) re = resSeqMap.refResSeq[resSeqMap.refOffset + resSeqMap.lenMatch - 1] self.assertEqual( resSeqMap.ref2target(re), resSeqMap.targetResSeq[resSeqMap.targetOffset + resSeqMap.lenMatch - 1] ) os.unlink(chainA) os.unlink(chainAstd)
def test_resSeqMap4(self): # See if we can sort out the indexing between the native and model nativePdb = os.path.join(self.testfiles_dir, "1K33.pdb") modelPdb = os.path.join(self.testfiles_dir, "1K33_S_00000001.pdb") nativePdbStd = "1K33_std.pdb" pdb_edit.standardise(nativePdb, nativePdbStd) nativeInfo = pdb_edit.get_info(nativePdbStd) modelInfo = pdb_edit.get_info(modelPdb) resSeqMap = residue_map.residueSequenceMap() resSeqMap.fromInfo(nativeInfo, 'A', modelInfo, 'A') os.unlink(nativePdbStd)
def test_resSeqMap4(self): # See if we can sort out the indexing between the native and model nativePdb = os.path.join(self.testfiles_dir,"1K33.pdb") modelPdb = os.path.join(self.testfiles_dir,"1K33_S_00000001.pdb") nativePdbStd = "1K33_std.pdb" pdb_edit.standardise( nativePdb, nativePdbStd ) nativeInfo = pdb_edit.get_info( nativePdbStd ) modelInfo = pdb_edit.get_info( modelPdb ) resSeqMap = residue_map.residueSequenceMap( ) resSeqMap.fromInfo( nativeInfo, 'A', modelInfo, 'A' ) os.unlink( nativePdbStd )
def generate_ensembles(self, models, ensembles_directory=None, nproc=None, percent_truncation=None, percent_fixed_intervals=None, side_chain_treatments=SIDE_CHAIN_TREATMENTS, truncation_method=None, truncation_pruning=None, truncation_scorefile=None, truncation_scorefile_header=None): """Method to generate ensembles from a single structure based on residue scores""" if not truncation_method: truncation_method = self.truncation_method if not truncation_pruning: truncation_pruning = self.truncation_pruning if not truncation_scorefile: truncation_scorefile = self.truncation_scorefile if len(models) > 1: msg = "More than 1 structure provided" logger.critical(msg) raise RuntimeError(msg) if len(truncation_scorefile_header) < 2: msg = "At least two header options for scorefile are required" logger.critical(msg) raise RuntimeError(msg) # standardise the structure std_models_dir = os.path.join(self.work_dir, "std_models") os.mkdir(std_models_dir) std_model = ample_util.filename_append(models[0], 'std', std_models_dir) pdb_edit.standardise(pdbin=models[0], pdbout=std_model, del_hetatm=True) std_models = [std_model] logger.info('Standardised input model: %s', std_models[0]) # Create final ensembles directory if not os.path.isdir(self.ensembles_directory): os.mkdir(self.ensembles_directory) truncate_dir = os.path.join(self.work_dir, "single_truncate") if not os.path.isdir(truncate_dir): os.mkdir(truncate_dir) # Read all the scores into a per residue dictionary assert len(truncation_scorefile_header) > 1, \ "At least two column labels are required" residue_scores = self._read_scorefile(truncation_scorefile) residue_key = truncation_scorefile_header.pop(0).lower() truncation_scorefile_header = map(str.strip, truncation_scorefile_header) assert all(h in residue_scores[0] for h in truncation_scorefile_header), \ "Not all column labels are in your CSV file" self.ensembles = [] for score_key in truncation_scorefile_header: zipped_scores = self._generate_residue_scorelist( residue_key, score_key, residue_scores) score_truncate_dir = os.path.join(truncate_dir, "{}".format(score_key)) if not os.path.isdir(score_truncate_dir): os.mkdir(score_truncate_dir) self.truncator = truncation_util.Truncator( work_dir=score_truncate_dir) self.truncator.theseus_exe = self.theseus_exe for truncation in self.truncator.truncate_models( models=std_models, truncation_method=truncation_method, percent_truncation=percent_truncation, percent_fixed_intervals=percent_fixed_intervals, truncation_pruning=truncation_pruning, residue_scores=zipped_scores): pre_ensemble = _ensembler.Ensemble() pre_ensemble.num_residues = truncation.num_residues pre_ensemble.truncation_dir = truncation.directory pre_ensemble.truncation_level = truncation.level pre_ensemble.truncation_method = truncation.method pre_ensemble.truncation_percent = truncation.percent pre_ensemble.truncation_residues = truncation.residues pre_ensemble.truncation_variance = truncation.variances pre_ensemble.truncation_score_key = score_key.lower() pre_ensemble.pdb = truncation.models[0] for ensemble in self.edit_side_chains(pre_ensemble, side_chain_treatments, single_structure=True): self.ensembles.append(ensemble) return self.ensembles
def analysePdb(amoptd): """Collect data on the native pdb structure""" nativePdb = fixpath(amoptd['native_pdb']) nativePdbInfo = pdb_edit.get_info(nativePdb) # number atoms/residues natoms, nresidues = pdb_edit.num_atoms_and_residues(nativePdb) # Get information on the origins for this spaceGroup try: originInfo = pdb_model.OriginInfo( spaceGroupLabel=nativePdbInfo.crystalInfo.spaceGroup) except Exception: originInfo = None # Do this here as a bug in pdbcur can knacker the CRYST1 data amoptd['native_pdb_code'] = nativePdbInfo.pdbCode amoptd['native_pdb_title'] = nativePdbInfo.title amoptd['native_pdb_resolution'] = nativePdbInfo.resolution amoptd['native_pdb_solvent_content'] = nativePdbInfo.solventContent amoptd[ 'native_pdb_matthews_coefficient'] = nativePdbInfo.matthewsCoefficient if not originInfo: space_group = "P1" else: space_group = originInfo.spaceGroup() amoptd['native_pdb_space_group'] = space_group amoptd['native_pdb_num_atoms'] = natoms amoptd['native_pdb_num_residues'] = nresidues # First check if the native has > 1 model and extract the first if so if len(nativePdbInfo.models) > 1: logger.info("nativePdb has > 1 model - using first") nativePdb1 = ample_util.filename_append(filename=nativePdb, astr="model1", directory=fixpath( amoptd['work_dir'])) pdb_edit.extract_model(nativePdb, nativePdb1, modelID=nativePdbInfo.models[0].serial) nativePdb = nativePdb1 # Standardise the PDB to rename any non-standard AA, remove solvent etc nativePdbStd = ample_util.filename_append(filename=nativePdb, astr="std", directory=fixpath( amoptd['work_dir'])) pdb_edit.standardise(nativePdb, nativePdbStd, del_hetatm=True) nativePdb = nativePdbStd # Get the new Info about the native nativePdbInfo = pdb_edit.get_info(nativePdb) # For maxcluster comparsion of shelxe model we need a single chain from the native so we get this here if len(nativePdbInfo.models[0].chains) > 1: chainID = nativePdbInfo.models[0].chains[0] nativeChain1 = ample_util.filename_append(filename=nativePdbInfo.pdb, astr="chain1", directory=fixpath( amoptd['work_dir'])) pdb_edit.to_single_chain(nativePdbInfo.pdb, nativeChain1) else: nativeChain1 = nativePdbInfo.pdb # Additional data amoptd['native_pdb_num_chains'] = len(nativePdbInfo.models[0].chains) amoptd['native_pdb_info'] = nativePdbInfo amoptd['native_pdb_std'] = nativePdbStd amoptd['native_pdb_1chain'] = nativeChain1 amoptd['native_pdb_origin_info'] = originInfo return
def generate_ensembles(self, models, alignment_file=None, homolog_aligner=None, percent_fixed_intervals=None, percent_truncation=None, side_chain_treatments=SIDE_CHAIN_TREATMENTS, truncation_method=None, **kwargs): if not percent_truncation: percent_truncation = self.percent_truncation if not truncation_method: truncation_method = self.truncation_method if not len(models): msg = "Cannot find any models for ensembling!" raise RuntimeError(msg) if not all([os.path.isfile(m) for m in models]): msg = "Problem reading models given to Ensembler: {0}".format(models) raise RuntimeError(msg) logger.info('Ensembling models in directory: %s', self.work_dir) # Create final ensembles directory if not os.path.isdir(self.ensembles_directory): os.mkdir(self.ensembles_directory) # standardise all the models std_models_dir = os.path.join(self.work_dir, "std_models") os.mkdir(std_models_dir) std_models = [] for m in models: std_model = ample_util.filename_append(m, 'std', std_models_dir) pdb_edit.standardise(pdbin=m, pdbout=std_model, del_hetatm=True) std_models.append(std_model) # Get a structural alignment between the different models if not alignment_file: if homolog_aligner == 'mustang': logger.info("Generating alignment file with mustang_exe: %s", self.mustang_exe) alignment_file = align_mustang(std_models, mustang_exe=self.mustang_exe, work_dir=self.work_dir) elif homolog_aligner == 'gesamt': logger.info("Generating alignment file with gesamt_exe: %s", self.gesamt_exe) alignment_file = align_gesamt(std_models, gesamt_exe=self.gesamt_exe, work_dir=self.work_dir) else: msg = "Unknown homolog_aligner: {0}".format(homolog_aligner) raise RuntimeError(msg) logger.info("Generated alignment file: %s", alignment_file) else: logger.info("Using alignment file: %s", alignment_file) truncate_dir = os.path.join(self.work_dir,"homolog_truncate") if not os.path.isdir(truncate_dir): os.mkdir(truncate_dir) # Now truncate and create ensembles - as standard ample, but with no subclustering self.ensembles = [] self.truncator = truncation_util.Truncator(work_dir=truncate_dir) self.truncator.theseus_exe = self.theseus_exe for truncation in self.truncator.truncate_models(models=std_models, truncation_method=truncation_method, percent_fixed_intervals=percent_fixed_intervals, percent_truncation=percent_truncation, truncation_pruning=None, homologs=True, alignment_file=alignment_file): ensemble_dir = os.path.join(truncation.directory, "ensemble_{0}".format(truncation.level)) os.mkdir(ensemble_dir) os.chdir(ensemble_dir) # Need to create an alignment file for theseus basename = "e{0}".format(truncation.level) superposed_models = self.superpose_models(truncation.models, basename=basename, work_dir=ensemble_dir, homologs=True) if not superposed_models: logger.critical("Skipping ensemble %s due to error with Theseus", basename) continue # Create Ensemble object pre_ensemble = _ensembler.Ensemble() pre_ensemble.num_residues = truncation.num_residues pre_ensemble.truncation_dir = truncation.directory pre_ensemble.truncation_level = truncation.level pre_ensemble.truncation_method = truncation.method pre_ensemble.truncation_percent = truncation.percent pre_ensemble.truncation_residues = truncation.residues pre_ensemble.truncation_variance = truncation.variances pre_ensemble.pdb = superposed_models for ensemble in self.edit_side_chains(pre_ensemble, side_chain_treatments, homologs=True): self.ensembles.append(ensemble) return self.ensembles
def generate_ensembles(self, models, ensembles_directory=None, nproc=None, percent_truncation=None, percent_fixed_intervals=None, side_chain_treatments=SIDE_CHAIN_TREATMENTS, truncation_method=None, truncation_pruning=None, truncation_scorefile=None, truncation_scorefile_header=None): """Method to generate ensembles from a single structure based on residue scores""" if not truncation_method: truncation_method = self.truncation_method if not truncation_pruning: truncation_pruning = self.truncation_pruning if not truncation_scorefile: truncation_scorefile = self.truncation_scorefile if len(models) > 1: msg = "More than 1 structure provided" logger.critical(msg) raise RuntimeError(msg) if len(truncation_scorefile_header) < 2: msg = "At least two header options for scorefile are required" logger.critical(msg) raise RuntimeError(msg) # standardise the structure std_models_dir = os.path.join(self.work_dir, "std_models") os.mkdir(std_models_dir) std_model = ample_util.filename_append(models[0], 'std', std_models_dir) pdb_edit.standardise(pdbin=models[0], pdbout=std_model, del_hetatm=True) std_models = [std_model] logger.info('Standardised input model: %s', std_models[0]) # Create final ensembles directory if not os.path.isdir(self.ensembles_directory): os.mkdir(self.ensembles_directory) truncate_dir = os.path.join(self.work_dir, "single_truncate") if not os.path.isdir(truncate_dir): os.mkdir(truncate_dir) # Read all the scores into a per residue dictionary assert len(truncation_scorefile_header) > 1, \ "At least two column labels are required" residue_scores = self._read_scorefile(truncation_scorefile) residue_key = truncation_scorefile_header.pop(0) truncation_scorefile_header = map(str.strip, truncation_scorefile_header) assert all(h in residue_scores[0] for h in truncation_scorefile_header), \ "Not all column labels are in your CSV file" self.ensembles = [] for score_key in truncation_scorefile_header: zipped_scores = self._generate_residue_scorelist(residue_key, score_key, residue_scores) score_truncate_dir = os.path.join(truncate_dir, "{}".format(score_key)) if not os.path.isdir(score_truncate_dir): os.mkdir(score_truncate_dir) self.truncator = truncation_util.Truncator( work_dir=score_truncate_dir) self.truncator.theseus_exe = self.theseus_exe for truncation in self.truncator.truncate_models(models=std_models, truncation_method=truncation_method, percent_truncation=percent_truncation, percent_fixed_intervals=percent_fixed_intervals, truncation_pruning=truncation_pruning, residue_scores=zipped_scores): pre_ensemble = _ensembler.Ensemble() pre_ensemble.num_residues = truncation.num_residues pre_ensemble.truncation_dir = truncation.directory pre_ensemble.truncation_level = truncation.level pre_ensemble.truncation_method = truncation.method pre_ensemble.truncation_percent = truncation.percent pre_ensemble.truncation_residues = truncation.residues pre_ensemble.truncation_variance = truncation.variances pre_ensemble.truncation_score_key = score_key.lower() pre_ensemble.pdb = truncation.models[0] for ensemble in self.edit_side_chains(pre_ensemble, side_chain_treatments, single_structure=True): self.ensembles.append(ensemble) return self.ensembles
def generate_ensembles(self, models, alignment_file=None, ensembles_directory=None, homolog_aligner=None, nproc=None, percent_truncation=None, side_chain_treatments=SIDE_CHAIN_TREATMENTS, truncation_method=None): if not percent_truncation: percent_truncation = self.percent_truncation if not truncation_method: truncation_method = self.truncation_method if not len(models): msg = "Cannot find any models for ensembling!" raise RuntimeError(msg) if not all([os.path.isfile(m) for m in models]): msg = "Problem reading models given to Ensembler: {0}".format( models) raise RuntimeError(msg) logger.info('Ensembling models in directory: %s', self.work_dir) # Create final ensembles directory if not os.path.isdir(self.ensembles_directory): os.mkdir(self.ensembles_directory) # standardise all the models std_models_dir = os.path.join(self.work_dir, "std_models") os.mkdir(std_models_dir) std_models = [] for m in models: std_model = ample_util.filename_append(m, 'std', std_models_dir) pdb_edit.standardise(pdbin=m, pdbout=std_model, del_hetatm=True) std_models.append(std_model) # Get a structural alignment between the different models if not alignment_file: if homolog_aligner == 'mustang': logger.info("Generating alignment file with mustang_exe: %s", self.mustang_exe) alignment_file = align_mustang(std_models, mustang_exe=self.mustang_exe, work_dir=self.work_dir) elif homolog_aligner == 'gesamt': logger.info("Generating alignment file with gesamt_exe: %s", self.gesamt_exe) alignment_file = align_gesamt(std_models, gesamt_exe=self.gesamt_exe, work_dir=self.work_dir) else: msg = "Unknown homolog_aligner: {0}".format(homolog_aligner) raise RuntimeError(msg) logger.info("Generated alignment file: %s", alignment_file) else: logger.info("Using alignment file: %s", alignment_file) truncate_dir = os.path.join(self.work_dir, "homolog_truncate") if not os.path.isdir(truncate_dir): os.mkdir(truncate_dir) # Now truncate and create ensembles - as standard ample, but with no subclustering self.ensembles = [] self.truncator = truncation_util.Truncator(work_dir=truncate_dir) self.truncator.theseus_exe = self.theseus_exe for truncation in self.truncator.truncate_models( models=std_models, truncation_method=truncation_method, percent_truncation=percent_truncation, truncation_pruning=None, homologs=True, alignment_file=alignment_file): ensemble_dir = os.path.join( truncation.directory, "ensemble_{0}".format(truncation.level)) os.mkdir(ensemble_dir) os.chdir(ensemble_dir) # Need to create an alignment file for theseus basename = "e{0}".format(truncation.level) superposed_models = self.superpose_models(truncation.models, basename=basename, work_dir=ensemble_dir, homologs=True) if not superposed_models: logger.critical( "Skipping ensemble %s due to error with Theseus", basename) continue # Create Ensemble object pre_ensemble = _ensembler.Ensemble() pre_ensemble.num_residues = truncation.num_residues pre_ensemble.truncation_dir = truncation.directory pre_ensemble.truncation_level = truncation.level pre_ensemble.truncation_method = truncation.method pre_ensemble.truncation_percent = truncation.percent pre_ensemble.truncation_residues = truncation.residues pre_ensemble.truncation_variance = truncation.variances pre_ensemble.pdb = superposed_models for ensemble in self.edit_side_chains(pre_ensemble, side_chain_treatments, homologs=True): self.ensembles.append(ensemble) return self.ensembles
def analysePdb(amoptd): """Collect data on the native pdb structure""" nativePdb = fixpath(amoptd['native_pdb']) nativePdbInfo = pdb_edit.get_info(nativePdb) # number atoms/residues natoms, nresidues = pdb_edit.num_atoms_and_residues(nativePdb) # Get information on the origins for this spaceGroup try: originInfo = pdb_model.OriginInfo(spaceGroupLabel=nativePdbInfo.crystalInfo.spaceGroup) except Exception: originInfo = None # Do this here as a bug in pdbcur can knacker the CRYST1 data amoptd['native_pdb_code'] = nativePdbInfo.pdbCode amoptd['native_pdb_title'] = nativePdbInfo.title amoptd['native_pdb_resolution'] = nativePdbInfo.resolution amoptd['native_pdb_solvent_content'] = nativePdbInfo.solventContent amoptd['native_pdb_matthews_coefficient'] = nativePdbInfo.matthewsCoefficient if not originInfo: space_group = "P1" else: space_group = originInfo.spaceGroup() amoptd['native_pdb_space_group'] = space_group amoptd['native_pdb_num_atoms'] = natoms amoptd['native_pdb_num_residues'] = nresidues # First check if the native has > 1 model and extract the first if so if len( nativePdbInfo.models ) > 1: logger.info("nativePdb has > 1 model - using first") nativePdb1 = ample_util.filename_append( filename=nativePdb, astr="model1", directory=fixpath(amoptd['work_dir'])) pdb_edit.extract_model( nativePdb, nativePdb1, modelID=nativePdbInfo.models[0].serial ) nativePdb = nativePdb1 # Standardise the PDB to rename any non-standard AA, remove solvent etc nativePdbStd = ample_util.filename_append( filename=nativePdb, astr="std", directory=fixpath(amoptd['work_dir'])) pdb_edit.standardise(nativePdb, nativePdbStd, del_hetatm=True) nativePdb = nativePdbStd # Get the new Info about the native nativePdbInfo = pdb_edit.get_info( nativePdb ) # For comparsion of shelxe model we need a single chain from the native so we get this here if len( nativePdbInfo.models[0].chains ) > 1: nativeChain1 = ample_util.filename_append( filename=nativePdbInfo.pdb, astr="chain1", directory=fixpath(amoptd['work_dir'])) pdb_edit.merge_chains(nativePdbInfo.pdb, nativeChain1) else: nativeChain1 = nativePdbInfo.pdb # Additional data amoptd['native_pdb_num_chains'] = len( nativePdbInfo.models[0].chains ) amoptd['native_pdb_info'] = nativePdbInfo amoptd['native_pdb_std'] = nativePdbStd amoptd['native_pdb_1chain'] = nativeChain1 amoptd['native_pdb_origin_info'] = originInfo return