def dope(wrkdir): """ Get DOPE and DOPE-HRS scores for protein-protein. PDB must contain only two chains. [1] M. Shen and A. Sali, "Statistical potential for assessment and prediction of protein structures", Protein Science, vol. 15, no. 11, pp. 2507-2524, 2006. """ import modeller import modeller.scripts time_start = timer() log.info("Getting DOPE and DOPE-HR scoring...") cpx = os.path.join(wrkdir, 'complexAB.pdb') with open(os.path.join(wrkdir, "dope.out"), "w") as fp: _stdout = sys.stdout sys.stdout = fp env = modeller.environ() env.libs.topology.read(file='$(LIB)/top_heav.lib') env.libs.parameters.read(file='$(LIB)/par.lib') mdl = modeller.scripts.complete_pdb(env, cpx) cpx = modeller.selection(mdl.chains[:]) lig = modeller.selection(mdl.chains[0]) rec = modeller.selection(mdl.chains[1]) dope = cpx.assess_dope() - rec.assess_dope() - lig.assess_dope() dopehr = cpx.assess_dopehr() - rec.assess_dopehr() - lig.assess_dopehr() sys.stdout.flush() sys.stdout = _stdout desc = dict() desc['DOPE'] = dope desc['DOPE-HR'] = dopehr time_end = timer() desc['>TIME_DOPE'] = time_end - time_start return desc
def main(args): mod.log.verbose() env = mod.environ() env.io.atom_files_directory = [".", args.dir, "../" + args.dir] env.libs.topology.read(file="$(LIB)/top_heav.lib") # read topology env.libs.parameters.read(file="$(LIB)/par.lib") # read parameters # assess model using DOPE model = complete_pdb(env, args.model) # read model file s = mod.selection(model) # all atoms selection s.assess_dope( output="ENERGY_PROFILE NO_REPORT", file=args.model.replace(".pdb", ".profile"), normalize_profile=True, smoothing_window=15, ) # assess template using DOPE template = complete_pdb( env, args.template, model_segment=( "FIRST:" + args.chains[0].upper(), "LAST:" + args.chains[1].upper(), ), ) s = mod.selection(template) # all atoms selection s.assess_dope( output="ENERGY_PROFILE NO_REPORT", file=args.template.replace(".pdb", ".profile"), normalize_profile=True, smoothing_window=15, )
def agbnp(wrkdir): """ Get AGBNP solvation term for protein-protein as implemented in modeller. [1] E. Gallicchio and R. M. Levy, "AGBNP: An analytic implicit solvent model suitable for molecular dynamics simulations and high-resolution modeling", Journal of Computational Chemistry, vol. 25, no. 4, pp. 479-499, 2004. """ import modeller import modeller.scripts time_start = timer() log.info("Getting AGBNP scoring...") cpx = os.path.join(wrkdir, 'complexAB.pdb') with open(os.path.join(wrkdir, "agbnp.out"), "w") as fp: _stdout = sys.stdout sys.stdout = fp env = modeller.environ() env.libs.topology.read(file='$(LIB)/top_heav.lib') env.libs.parameters.read(file='$(LIB)/par.lib') mdl = modeller.scripts.complete_pdb(env, cpx) cpx = modeller.selection(mdl.chains[:]) lig = modeller.selection(mdl.chains[0]) rec = modeller.selection(mdl.chains[1]) agbnp = AGBNPScorer() score = cpx.assess(agbnp) - rec.assess(agbnp) - lig.assess(agbnp) sys.stdout.flush() sys.stdout = _stdout desc = dict() desc['AGBNP'] = score time_end = timer() desc['>TIME_AGBNP'] = time_end - time_start return desc
def test_feature_atmacc_undef(self): """Check atom accessibility features undefined bin""" mdl = self.build_test_model() modeller.selection(mdl).unbuild() mlib = self.get_mdt_library() bins = mdt.uniform_bins(1, -20000, 40000) # All features should go in the undefined bin, even though # the raw value of the feature should fit in the first bin for f in [mdt.features.AtomAccessibility(mlib, bins), mdt.features.FractionalAtomAccessibility(mlib, bins)]: m = self.build_mdt_from_model(mlib, f, mdl) self.assertEqual(m[0], 0) self.assertEqual(m[-1], 7)
def soap_pp(wrkdir): """ Get SOAP-PP Pair score for protein-protein. PDB must contain only two chains. Do not compute the SOAP-PP Atom score. [1] G. Q. Dong, H. Fan, D. Schneidman-Duhovny, B. Webb, and A. Sali, "Optimized atomic statistical potentials: assessment of protein interfaces and loops", Bioinformatics, vol. 29, no. 24, pp. 3158-3166, 2013. """ import modeller import modeller.scripts import modeller.soap_pp time_start = timer() log.info("Getting SOAP-PP-Pair scoring...") cpx = os.path.join(wrkdir, 'complexAB.pdb') with open(os.path.join(wrkdir, "soap_pp_pair.out"), "w") as fp: _stdout = sys.stdout sys.stdout = fp env = modeller.environ() env.libs.topology.read(file='$(LIB)/top_heav.lib') env.libs.parameters.read(file='$(LIB)/par.lib') mdl = modeller.scripts.complete_pdb(env, cpx) cpx = modeller.selection(mdl.chains[:]) soap_pp_pair = modeller.soap_pp.PairScorer() score = cpx.assess(soap_pp_pair) sys.stdout.flush() sys.stdout = _stdout desc = dict() desc['SOAP-PP-Pair'] = score time_end = timer() desc['>TIME_SOAP-PP-Pair'] = time_end - time_start return desc
def align_template_to_reference(msmseed, ref_msmseed): import modeller import tempfile import shutil import copy import os temp_dir = tempfile.mkdtemp() try: os.chdir(temp_dir) alignment_file = open('aln_tmp.pir','w') aln = _PIR_alignment(ref_msmseed.template_sequence, ref_msmseed.template_id, msmseed.template_sequence, msmseed.template_id) alignment_file.writelines(aln) alignment_file.close() template_file = open(msmseed.template_id + '.pdb','w') template_pdb = msmseed.template_structure template_pdb.writeFile(template_pdb.topology, template_pdb.positions, template_file) template_file.close() ref_pdb = ref_msmseed.template_structure ref_file = open(ref_msmseed.template_id + '.pdb', 'w') ref_pdb.writeFile(ref_pdb.topology, ref_pdb.positions, ref_file) ref_file.close() modeller.log.none() env = modeller.environ() env.io.atom_files_directory = temp_dir aln = modeller.alignment(env, file='aln_tmp.pir', align_codes=(ref_msmseed.template_id, msmseed.template_id)) mdl = modeller.model(env, file=ref_msmseed.template_id + '.pdb') mdl2 = modeller.model(env, file=msmseed.template_id+'.pdb') atmsel = modeller.selection(mdl).only_atom_types('CA') r = atmsel.superpose(mdl2, aln) msmseed.rmsd_to_reference = copy.deepcopy(r.rms) except Exception as e: msmseed.error_message = e.message finally: shutil.rmtree(temp_dir) return msmseed
def test_feature_distance_undefined(self): """Check atom-atom distance feature undefined bin""" env = self.get_environ() mlib = self.get_mdt_library() dist = mdt.features.AtomDistance(mlib, bins=mdt.uniform_bins(1, -1000, 2000)) mdl = self.build_test_model() m = self.build_mdt_from_model(mlib, dist, mdl, residue_span_range=(-99999, 0, 0, 99999)) self.assertEqual(m.shape, (2,)) self.assertEqual(m[0], 21.0) self.assertEqual(m[1], 0.0) # If any coordinate is undefined, the distance is modeller.selection(mdl.atoms[0]).unbuild() m = self.build_mdt_from_model(mlib, dist, mdl, residue_span_range=(-99999, 0, 0, 99999)) self.assertEqual(m.shape, (2,)) self.assertEqual(m[0], 15.0) self.assertEqual(m[1], 6.0)
def test_script9(self): """Test step 9 (multiple fitting)""" # Get inputs (outputs from step 8) for i in ('top', 'bottom'): shutil.copy('precalculate_results/stage8_split_density/' \ 'groel-11.5A.%s.mrc' % i, 'output') # Make sure the script runs without errors p = subprocess.check_call(['scripts/' \ 'script9_symmetric_multiple_fitting.py']) e = modeller.environ() ref = modeller.model(e, file='precalculate_results/stage9_symmetric_multiple_fitting/' \ 'model.top.0.pdb') sel = modeller.selection(ref).only_atom_types('CA') # At least one model in each ring should be close to the reference for side in ('top', 'bottom'): rms = [] for i in range(6): fname = 'output/model.%s.%d.pdb' % (side, i) m = modeller.model(e, file=fname) a = modeller.alignment(e) a.append_model(ref, align_codes='ref') a.append_model(m, align_codes='model') rms.append(sel.superpose(m, a).rms) os.unlink(fname) self.assertTrue(min(rms) < 10.0) os.unlink('output/intermediate_asmb_sols.out') for side in ('top', 'bottom'): os.unlink('output/multifit.%s.output' % side) os.unlink('output/multifit.%s.output.symm.ref' % side) os.unlink('output/multifit.%s.param' % side)
def find(self): """Return a Modeller selection corresponding to the allosteric site. @raise AllostericSiteError on error.""" if self.__allosteric_site is None: # align PDB2 to PDB1 and superimpose antigen try: salign0(self.env, self.pdb1, self.pdb2) except modeller.ModellerError as err: raise AllostericSiteError("Could not align %s with %s: %s. " "This is usually due to a poor alignment." % (self.pdb2, self.pdb1, str(err))) pmfit = get_fit_filename(self.pdb2) # determine residues in PDB2 that contact LIG1 self.__pmfit = modeller.model(self.env, file=pmfit) lig1 = modeller.model(self.env, file=self.ligand) self.__allosteric_site = \ modeller.selection([ri for ri, rj, dist \ in get_inter_contacts(self.env, self.__pmfit, lig1, self.rcut)]) os.unlink(pmfit) os.unlink(get_fit_filename(self.pdb1)) if len(self.__allosteric_site) == 0: raise AllostericSiteError("No allosteric site found") return self.__allosteric_site
def test_feature_angle_undefined(self): """Check angle feature undefined bin""" env = self.get_environ() mlib = self.get_mdt_library() mlib.angle_classes.read("data/anggrp.lib") angle = mdt.features.Angle(mlib, bins=mdt.uniform_bins(1, -500, 1000)) mdl = self.build_test_model() m = self.build_mdt_from_model(mlib, angle, mdl, residue_span_range=(-99999, 0, 0, 99999)) self.assertEqual(m.shape, (2,)) self.assertEqual(m[0], 5.0) self.assertEqual(m[1], 0.0) # If any coordinate is undefined, the angle is modeller.selection(mdl.atoms[0]).unbuild() m = self.build_mdt_from_model(mlib, angle, mdl, residue_span_range=(-99999, 0, 0, 99999)) self.assertEqual(m.shape, (2,)) self.assertEqual(m[0], 3.0) self.assertEqual(m[1], 2.0)
def test_feature_dihedral_undefined(self): """Check dihedral feature undefined bin""" env = self.get_environ() mlib = self.get_mdt_library() mlib.dihedral_classes.read("data/impgrp.lib") dih = mdt.features.Dihedral(mlib, bins=mdt.uniform_bins(1, -500, 1000)) mdl = self.build_test_model() m = self.build_mdt_from_model(mlib, dih, mdl, residue_span_range=(-99999, 0, 0, 99999)) self.assertEqual(m.shape, (2,)) self.assertEqual(m[0], 1.0) self.assertEqual(m[1], 0.0) # If any coordinate is undefined, the dihedral is modeller.selection(mdl.atoms[0]).unbuild() m = self.build_mdt_from_model(mlib, dih, mdl, residue_span_range=(-99999, 0, 0, 99999)) self.assertEqual(m.shape, (2,)) self.assertEqual(m[0], 0.0) self.assertEqual(m[1], 1.0)
def test_integrative_modeling(self): """Test the entire integrative modeling run""" import modeller # Compile the clustering program subprocess.check_call(['gfortran', 'cluster.f', 'u3best.f', '-o', 'cluster.x'], cwd='integrative_modeling/bin') # Run sampling subprocess.check_call(['./run_modeling.py'], cwd='integrative_modeling') # Analysis subprocess.check_call(['bin/get_frames.sh'], cwd='integrative_modeling') # Make sure that at least two of the three "known good" clusters # are reproduced clusters = glob.glob('integrative_modeling/clustering/clus.*.pdb') clusters = [x for x in clusters if '-' not in x] exp_clusters = glob.glob('model_refinement/cluster*/model.pdb') env = modeller.environ() n_cluster = 0 rms = [] cluster_match = [0] * len(clusters) exp_cluster_match = [0] * len(exp_clusters) # Get a matrix of RMSD between all clusters and the expected clusters for ncluster, cluster in enumerate(clusters): per_cluster = [] for nexp_cluster, exp_cluster in enumerate(exp_clusters): mc = modeller.model(env, file=cluster) s = modeller.selection(mc) a = modeller.alignment(env) me = modeller.model(env, file=exp_cluster) a.append_model(mc, align_codes='clus') a.append_model(me, align_codes='exp_clus') # We only care about the global (non-cutoff) RMSD, so use a # large cutoff so that refine_local doesn't increase the number # of equivalent positions at the expense of worsening the RMSD r = s.superpose(me, a, rms_cutoff=999.) if r.rms < 15.0: cluster_match[ncluster] += 1 exp_cluster_match[nexp_cluster] += 1 per_cluster.append(r.rms) rms.append(per_cluster) # Count the number of clusters which are close to an expected cluster ncluster_match = len(cluster_match) - cluster_match.count(0) # Count the number of expected clusters which are close to a cluster nexp_cluster_match = len(exp_cluster_match) - exp_cluster_match.count(0) # Make sure that at least 2 of the 3 expected clusters is close to one # of the clusters we produced (but not all the *same* cluster) self.assertTrue(ncluster_match >= 2 and nexp_cluster_match >= 2, "Could not find any match between the %d clusters " "found in this test and 2 of the 3 'known good' " "clusters (match defined as all-atom RMSD less than " "15.0A). RMSD matrix: %s" % (len(clusters), str(rms)))
def test_feature_z_coordinate_undefined(self): """Check atom Z-coordinate feature undefined bin""" mdl = self.build_test_model() mlib = self.get_mdt_library() bins = mdt.uniform_bins(1, -20000, 40000) z = mdt.features.AtomZCoordinate(mlib, bins) m = self.build_mdt_from_model(mlib, z, mdl) self.assertEqual(m.shape, (2,)) self.assertEqual(m[0], 7.0) self.assertEqual(m[1], 0.0) # Make one z-coordinate undefined modeller.selection(mdl.atoms[0]).unbuild() m = self.build_mdt_from_model(mlib, z, mdl) self.assertEqual(m.shape, (2,)) self.assertEqual(m[0], 6.0) # undefined bin should contain one count even though actual value # should fall within first bin self.assertEqual(m[1], 1.0)
def test_feature_hbond_undef(self): """Check hydrogen bond features undefined bin""" mdl = self.build_test_model() modeller.selection(mdl).unbuild() mlib = self.get_mdt_library() mlib.hbond_classes.read("data/atmcls-hbda.lib") bins = mdt.uniform_bins(1, -20000, 40000) # All features should go in the undefined bin, even though # the raw value of the feature should fit in the first bin for f in [ mdt.features.HydrogenBondDonor(mlib, bins), mdt.features.HydrogenBondAcceptor(mlib, bins), mdt.features.HydrogenBondCharge(mlib, bins), ]: m = self.build_mdt_from_model(mlib, f, mdl) self.assertEqual(m[0], 0) self.assertEqual(m[-1], 7)
def special_restraints(self, aln): for homochains in self.homomer_sets: if len(homochains) < 2: continue selections = [] for chain in homochains: selections.append(modeller.selection(self.chains[chain]).only_atom_types('CA')) for pos1, selection1 in enumerate(selections): for pos2, selection2 in enumerate(selections): if pos2 <= pos1: continue if len(selection1) != len(selection2): continue self.restraints.symmetry.append(modeller.symmetry(selection1, selection2, 1.0))
def __init__(self, filename=None, *args, **kwargs): super(MyLoop, self).__init__(*args, **kwargs) if filename is not None: self.pdb = _IO.PDB.PDB(filename) self.total_residue_list = self.pdb.totalResidueList() self.transformed_total_residue_list = self._transform_id( self.total_residue_list) self.transformed_missing_residue_list = [ self.transformed_total_residue_list[i] for i, res in enumerate(self.total_residue_list) if type(res) == _IO.PDB.MissingResidue ] else: raise ValueError("Need to specify input PDB filename") self.selection = _modeller.selection()
def dynamicMDL(mol,temp=300,maxit=1000,store=True): mdl = mol.mdl # Select all atoms: atmsel = modeller.selection(mdl) # Generate the restraints: mdl.restraints.make(atmsel, restraint_type='stereo', spline_on_site=False) #mdl.restraints.write(file=mpath+mname+'.rsr') mpdf = atmsel.energy() print "before optmimise" md = modeller.optimizers.molecular_dynamics(output='REPORT') mol.pmvaction.last = 10000 mol.pmvaction.store = store print "optimise" md.optimize(atmsel, temperature=temp, max_iterations=int(maxit),actions=mol.pmvaction) del md return True
def minimizeMDL(mol,maxit=1000,store=True): mdl = mol.mdl atmsel = modeller.selection(mdl) # Generate the restraints: mdl.restraints.make(atmsel, restraint_type='stereo', spline_on_site=False) #mdl.restraints.write(file=mpath+mname+'.rsr') mpdf = atmsel.energy() print "before optmimise" # Create optimizer objects and set defaults for all further optimizations cg = modeller.optimizers.conjugate_gradients(output='REPORT') mol.pmvaction.last = 10000 print "optimise" mol.pmvaction.store = store#self.pd.GetBool(self.pd.CHECKBOXS['store']['id']) cg.optimize(atmsel, max_iterations=maxit, actions=mol.pmvaction)#actions.trace(5, trcfil)) del cg return True
def minimizeMDL(mol, maxit=1000, store=True): mdl = mol.mdl atmsel = modeller.selection(mdl) # Generate the restraints: mdl.restraints.make(atmsel, restraint_type='stereo', spline_on_site=False) #mdl.restraints.write(file=mpath+mname+'.rsr') mpdf = atmsel.energy() print("before optmimise") # Create optimizer objects and set defaults for all further optimizations cg = modeller.optimizers.conjugate_gradients(output='REPORT') mol.pmvaction.last = 10000 print("optimise") mol.pmvaction.store = store #self.pd.GetBool(self.pd.CHECKBOXS['store']['id']) cg.optimize(atmsel, max_iterations=maxit, actions=mol.pmvaction) #actions.trace(5, trcfil)) del cg return True
def optimize(pdb, pdb_path): print(1, pdb_path) # Environ data env = environ(0) env.io.atom_files_directory = ['../atom_files'] env.edat.dynamic_sphere = True env.libs.topology.read(file='$(LIB)/top_heav.lib') env.libs.parameters.read(file='$(LIB)/par.lib') code = pdb.split('.')[0] mdl = complete_pdb(env, pdb) mdl.write(file=code+'.ini') # Select all atoms: atmsel = selection(mdl) # Generate the restraints: mdl.restraints.make(atmsel, restraint_type='stereo', spline_on_site=False) mdl.restraints.write(file=code+'.rsr') mpdf_prior = atmsel.energy() # Create optimizer objects and set defaults for all further optimizations cg = conjugate_gradients(output='REPORT') md = molecular_dynamics(output='REPORT') # Open a file to get basic stats on each optimization trcfil = open(code+'.D00000001', 'w') # Run CG on the all-atom selection; write stats every 5 steps cg.optimize(atmsel, max_iterations=20, actions=actions.trace(5, trcfil)) # Run MD; write out a PDB structure (called '1fas.D9999xxxx.pdb') every # 10 steps during the run, and write stats every 10 steps md.optimize(atmsel, temperature=300, max_iterations=50, actions=[actions.write_structure(10, code+'.D9999%04d.pdb'), actions.trace(10, trcfil)]) # Finish off with some more CG, and write stats every 5 steps cg.optimize(atmsel, max_iterations=20, actions=[actions.trace(5, trcfil)]) mpdf_after = atmsel.energy() mdl.write(file=os.path.join(pdb_path, 'optimized.pdb')) return (mpdf_prior, mpdf_after)
def modellerOptimizeInit(self): import modeller mname,mol=self.pmvModel.name,self.pmvModel mdl = mol.mdl # Select all atoms: self.atmsel = modeller.selection(mdl) # Generate the restraints: mdl.restraints.make(self.atmsel, restraint_type='stereo', spline_on_site=False) #mdl.restraints.write(file=mpath+mname+'.rsr') self.mpdf = self.atmsel.energy() print "before optmimise" # Create optimizer objects and set defaults for all further optimizations if self.rtType == "mini" : self.cg = modeller.optimizers.conjugate_gradients(output='REPORT') elif self.rtType == "md" : self.cg = modeller.optimizers.molecular_dynamics(output='REPORT') self.last = 10000 print "optimise"
def test_truncated_gaussian(self): """Test TruncatedGaussian math form""" import modeller from allosmod.modeller.forms import TruncatedGaussian m = self.make_model() feat = modeller.features.x_coordinate(m.atoms[0]) f = TruncatedGaussian(group=modeller.physical.xy_distance, feature=feat, dele_max=10, slope=4.0, scl_delx=0.7, weights=(1,1), means=(14, 28), stdevs=(1, 2)) m.restraints.add(f) sel = modeller.selection(m) e = [] for i in range(30): m.atoms[0].x = 2.0 * i e.append(sel.objfunc()) expected_e = [10.133, 10.133, 10.133, 10.133, 10.172, 5.263, 1.722, 0.542, 1.722, 5.260, 5.672, 3.607, 2.131, 1.246, 0.951, 1.246, 2.131, 3.607, 5.672, 8.314, 10.135, 10.133, 10.133, 10.133, 10.133, 10.133, 10.133, 10.133, 10.133, 10.133] for a, b in zip(e, expected_e): self.assertAlmostEqual(a, b, places=1)
def main(): if len(sys.argv) != 2: print 'ERROR, incorrect number of inputs!!!' print usage quit() elif not osp.isfile(sys.argv[1]): print 'The input file' + sys.argv[1] + ' does not exist!!!' print usage quit() env = environ() env.libs.topology.read(file='$(LIB)/top_heav.lib') env.libs.parameters.read(file='$(LIB)/par.lib') mdl = complete_pdb(env, sys.argv[1]) atmsel = selection(mdl.chains[0]) score = atmsel.assess_dope() return 0
def dynamicMDL(mol, temp=300, maxit=1000, store=True): mdl = mol.mdl # Select all atoms: atmsel = modeller.selection(mdl) # Generate the restraints: mdl.restraints.make(atmsel, restraint_type='stereo', spline_on_site=False) #mdl.restraints.write(file=mpath+mname+'.rsr') mpdf = atmsel.energy() print("before optmimise") md = modeller.optimizers.molecular_dynamics(output='REPORT') mol.pmvaction.last = 10000 mol.pmvaction.store = store print("optimise") md.optimize(atmsel, temperature=temp, max_iterations=int(maxit), actions=mol.pmvaction) del md return True
def soap_score(): import modeller from modeller.scripts import complete_pdb from modeller import soap_protein_od env = modeller.environ() env.libs.topology.read(file='$(LIB)/top_heav.lib') env.libs.parameters.read(file='$(LIB)/par.lib') # Set up SOAP-Protein-OD scoring (we create the scorer once and keep it # around, since reading in the potential from disk can take a long time). sp = soap_protein_od.Scorer() out = open('SnapList.txt', 'w') cnt = 0 files = [ i for i in glob.glob('pm.pdb*.pdb') if 'pm.pdb.B10010002.pdb' not in i ] for fil in files: try: cnt += 1 # Read a model previously generated by Modeller's automodel class mdl = complete_pdb(env, fil) # Select all atoms atmsel = modeller.selection(mdl) # Assess with the above Scorer try: score = atmsel.assess(sp) out.write(fil + '\t' + str(score) + '\n') except modeller.ModellerError: print("The SOAP-Protein-OD library file is not included " "with MODELLER.") print("Please get it from https://salilab.org/SOAP/.") # Was 'except: pass' but this will hide genuine errors. Replace with # a more specific list of exceptions (and test) except: raise out.close()
def modellerOptimizeInit(self): import modeller mname, mol = self.pmvModel.name, self.pmvModel mdl = mol.mdl # Select all atoms: self.atmsel = modeller.selection(mdl) # Generate the restraints: mdl.restraints.make(self.atmsel, restraint_type='stereo', spline_on_site=False) #mdl.restraints.write(file=mpath+mname+'.rsr') self.mpdf = self.atmsel.energy() print("before optmimise") # Create optimizer objects and set defaults for all further optimizations if self.rtType == "mini": self.cg = modeller.optimizers.conjugate_gradients(output='REPORT') elif self.rtType == "md": self.cg = modeller.optimizers.molecular_dynamics(output='REPORT') self.last = 10000 print("optimise")
def _compute_dope(self, str_file_path, profile_file_path, env=None): """ Uses MODELLER to compute the DOPE of a polypeptidic chain, and ouptuts the results in 'profile_file_path'. When 'env' is set to 'None', MODELLER will be initialized. If MODELLER has already been initialized, the its 'env' varibale can be passed in this argument so that it is not initialized again. """ if env == None: env = self._initialize_env() modstr = complete_pdb(env, str(str_file_path)) # Assess with DOPE. s = modeller.selection(modstr).only_std_residues( ) # only_het_residues, only_std_residues, only_water_residues # Gets the DOPE score. score = s.assess_dope(output='ENERGY_PROFILE NO_REPORT', file=str(profile_file_path), normalize_profile=True, smoothing_window=15) return score
def align_template_to_reference(msmseed, ref_msmseed): import modeller import tempfile import shutil import copy import os temp_dir = tempfile.mkdtemp() try: os.chdir(temp_dir) alignment_file = open('aln_tmp.pir', 'w') aln = _PIR_alignment(ref_msmseed.template_sequence, ref_msmseed.template_id, msmseed.template_sequence, msmseed.template_id) alignment_file.writelines(aln) alignment_file.close() template_file = open(msmseed.template_id + '.pdb', 'w') template_pdb = msmseed.template_structure template_pdb.writeFile(template_pdb.topology, template_pdb.positions, template_file) template_file.close() ref_pdb = ref_msmseed.template_structure ref_file = open(ref_msmseed.template_id + '.pdb', 'w') ref_pdb.writeFile(ref_pdb.topology, ref_pdb.positions, ref_file) ref_file.close() modeller.log.none() env = modeller.environ() env.io.atom_files_directory = temp_dir aln = modeller.alignment(env, file='aln_tmp.pir', align_codes=(ref_msmseed.template_id, msmseed.template_id)) mdl = modeller.model(env, file=ref_msmseed.template_id + '.pdb') mdl2 = modeller.model(env, file=msmseed.template_id + '.pdb') atmsel = modeller.selection(mdl).only_atom_types('CA') r = atmsel.superpose(mdl2, aln) msmseed.rmsd_to_reference = copy.deepcopy(r.rms) except Exception as e: msmseed.error_message = e.message finally: shutil.rmtree(temp_dir) return msmseed
def soap_score(): import modeller from modeller.scripts import complete_pdb from modeller import soap_protein_od env = modeller.environ() env.libs.topology.read(file='$(LIB)/top_heav.lib') env.libs.parameters.read(file='$(LIB)/par.lib') # Set up SOAP-Protein-OD scoring (we create the scorer once and keep it # around, since reading in the potential from disk can take a long time). sp = soap_protein_od.Scorer() out = open('SnapList.txt','w') cnt = 0 files = [i for i in glob.glob('pm.pdb*.pdb') if 'pm.pdb.B10010002.pdb' not in i] for fil in files: try: cnt += 1 # Read a model previously generated by Modeller's automodel class mdl = complete_pdb(env, fil) # Select all atoms atmsel = modeller.selection(mdl) # Assess with the above Scorer try: score = atmsel.assess(sp) out.write(fil+'\t'+str(score)+'\n') except modeller.ModellerError: print("The SOAP-Protein-OD library file is not included " "with MODELLER.") print("Please get it from https://salilab.org/SOAP/.") # Was 'except: pass' but this will hide genuine errors. Replace with # a more specific list of exceptions (and test) except: raise out.close()
def select_atoms(self): missing_atoms = modeller.selection() for atom_index in missing_atom_indices: missing_atoms.add(self.atoms[atom_index]) return missing_atoms
import IMP.core import IMP.modeller # Set up Modeller and build a model from the GGCC primary sequence e = modeller.environ() e.edat.dynamic_sphere = False e.libs.topology.read('${LIB}/top_heav.lib') e.libs.parameters.read('${LIB}/par.lib') modmodel = modeller.model(e) modmodel.build_sequence('GGCC') # Set up IMP and load the Modeller model in as a new Hierarchy m = IMP.kernel.Model() protein = IMP.modeller.ModelLoader(modmodel).load_atoms(m) # Create a simple IMP distance restraint between the first and last atoms atoms = IMP.atom.get_by_type(protein, IMP.atom.ATOM_TYPE) r = IMP.core.DistanceRestraint(IMP.core.Harmonic(10.0, 1.0), atoms[0].get_particle(), atoms[-1].get_particle()) m.add_restraint(r) # Use the IMPRestraints class to add all of the IMP restraints to the # Modeller scoring function t = modmodel.env.edat.energy_terms t.append(IMP.modeller.IMPRestraints(atoms)) # Calculate the Modeller energy (score) for the whole protein sel = modeller.selection(modmodel) sel.energy()
def test_deriv(self): """Test calculated derivatives for a distorted model's map""" if modeller is None: self.skipTest("modeller module unavailable") modeller.log.level= (0,0,0,0,1) self.env = modeller.environ() self.env.edat.dynamic_sphere = False self.env.libs.topology.read(file='$(LIB)/top_heav.lib') self.env.libs.parameters.read(file='$(LIB)/par.lib') #init IMP model ( the environment) self.imp_model = IMP.kernel.Model() self.particles = [] ## - create a set of three particles in imp for i in range(3): self.particles.append(IMP.kernel.Particle(self.imp_model)) #add IMP Restraints into the modeller scoring function t = self.env.edat.energy_terms t.append(IMP.modeller.IMPRestraints(self.particles)) # Load the same model into Modeller self.modeller_model = copy_to_modeller(self.env, self.particles) # - add the particles attributes rad = 1.0 wei = 1.0 wei_key=IMP.FloatKey("weight") prot_key=IMP.IntKey("protein") id_key=IMP.IntKey("id") for i,p_data in enumerate([[9.0,9.0,9.0,rad,wei,1],[12.0,3.0,3.0,rad,wei,1],[3.0,12.0,12.0,rad,wei,1]]): p=self.particles[i] center = IMP.algebra.Vector3D(*p_data[0:3]) sphere = IMP.algebra.Sphere3D(center, p_data[3]) IMP.core.XYZR.setup_particle(p, sphere) p.add_attribute(wei_key,p_data[4]) p.add_attribute(prot_key,p_data[5]) p.add_attribute(id_key,i) self.atmsel = modeller.selection(self.modeller_model) print "initialization done ..." resolution=3. voxel_size=1. model_map = IMP.em.SampledDensityMap(self.particles, resolution, voxel_size,wei_key) erw = IMP.em.EMReaderWriter() xorigin = model_map.get_header().get_xorigin() yorigin = model_map.get_header().get_yorigin() zorigin = model_map.get_header().get_zorigin() print("x= " + str(xorigin) + " y=" + str(yorigin) + " z=" + str(zorigin) ) mapfile = IMP.base.create_temporary_file_name('xxx.em') IMP.em.write_map(model_map, mapfile, erw) # EM restraint em_map = IMP.em.read_map(mapfile, erw) em_map.get_header_writable().set_xorigin(xorigin) em_map.get_header_writable().set_yorigin(yorigin) em_map.get_header_writable().set_zorigin(zorigin) em_map.get_header_writable().compute_xyz_top() em_map.get_header_writable().set_resolution(resolution) print "rms_calc",em_map.get_rms_calculated() em_map.calcRMS() print "rms_calc",em_map.get_rms_calculated() ind_emrsr = [] ind_emrsr.append(IMP.em.FitRestraint(self.particles, em_map, [0.,0.], wei_key, 1.0)) self.imp_model.add_restraint(ind_emrsr[0]) print("EM-score score: "+str(self.atmsel.energy()) ) self.atmsel.randomize_xyz(1.0) nviol = self.atmsel.debug_function(debug_function_cutoff=(.010, 0.010, 0.01), detailed_debugging=True) self.assertLess(nviol, 1, "at least one partial derivative is wrong!") print " derivs done ..." os.unlink(mapfile)
def test_deriv(self): """Test calculated derivatives for a distorted model's map""" if modeller is None: self.skipTest("modeller module unavailable") modeller.log.level = (0, 0, 0, 0, 1) self.env = modeller.environ() self.env.edat.dynamic_sphere = False self.env.libs.topology.read(file='$(LIB)/top_heav.lib') self.env.libs.parameters.read(file='$(LIB)/par.lib') # init IMP model ( the environment) self.imp_model = IMP.Model() self.particles = [] # - create a set of three particles in imp for i in range(3): self.particles.append(IMP.Particle(self.imp_model)) # Load the same model into Modeller self.modeller_model = copy_to_modeller(self.env, self.particles) # - add the particles attributes rad = 1.0 wei = 1.0 wei_key = IMP.FloatKey("weight") prot_key = IMP.IntKey("protein") id_key = IMP.IntKey("id") for i, p_data in enumerate([[9.0, 9.0, 9.0, rad, wei, 1], [12.0, 3.0, 3.0, rad, wei, 1], [3.0, 12.0, 12.0, rad, wei, 1]]): p = self.particles[i] center = IMP.algebra.Vector3D(*p_data[0:3]) sphere = IMP.algebra.Sphere3D(center, p_data[3]) IMP.core.XYZR.setup_particle(p, sphere) p.add_attribute(wei_key, p_data[4]) p.add_attribute(prot_key, p_data[5]) p.add_attribute(id_key, i) self.atmsel = modeller.selection(self.modeller_model) print("initialization done ...") resolution = 3. voxel_size = 1. model_map = IMP.em.SampledDensityMap(self.particles, resolution, voxel_size, wei_key) erw = IMP.em.EMReaderWriter() xorigin = model_map.get_header().get_xorigin() yorigin = model_map.get_header().get_yorigin() zorigin = model_map.get_header().get_zorigin() print(("x= " + str(xorigin) + " y=" + str(yorigin) + " z=" + str(zorigin))) mapfile = IMP.create_temporary_file_name('xxx.em') IMP.em.write_map(model_map, mapfile, erw) # EM restraint em_map = IMP.em.read_map(mapfile, erw) em_map.get_header_writable().set_xorigin(xorigin) em_map.get_header_writable().set_yorigin(yorigin) em_map.get_header_writable().set_zorigin(zorigin) em_map.get_header_writable().compute_xyz_top() em_map.get_header_writable().set_resolution(resolution) print("rms_calc", em_map.get_rms_calculated()) em_map.calcRMS() print("rms_calc", em_map.get_rms_calculated()) ind_emrsr = [] ind_emrsr.append( IMP.em.FitRestraint(self.particles, em_map, [0., 0.], wei_key, 1.0)) sf = IMP.core.RestraintsScoringFunction(ind_emrsr) # add IMP Restraints into the modeller scoring function t = self.modeller_model.env.edat.energy_terms t.append(IMP.modeller.IMPRestraints(self.particles, sf)) print(("EM-score score: " + str(self.atmsel.energy()))) self.atmsel.randomize_xyz(1.0) nviol = self.atmsel.debug_function(debug_function_cutoff=(.010, 0.010, 0.01), detailed_debugging=True) self.assertLess(nviol, 1, "at least one partial derivative is wrong!") print(" derivs done ...") os.unlink(mapfile)
def select_loop_atoms(self): from modeller import selection return selection( self.residue_range('395', '401'))
def select_atoms(self): from modeller import selection return selection( self.residue_range('395', '401')) def customised_function(self): pass
def select_loop_atoms(self): from modeller import selection return selection(self.residue_range("795", "817"))
def _execute_insertion(self, output_folder): """ Makes insertions to a protein structure, ligands will be removed, since it is hard to guarantee that all structures have ligands. """ protein_environment = modeller.environ(rand_seed=-49837) protein_environment.libs.topology.read(file='$(LIB)/top_heav.lib') protein_environment.libs.parameters.read(file='$(LIB)/par.lib') protein_environment.io.hetatm = False # Structure as it will be read: # {'1,133l': [['A', 'CYS', '40'], '/Users/gcc/Downloads/crap_tastic_pdbs/133l.pdb'], # '1,134l': [['A', 'VAL', '32'], '/Users/gcc/Downloads/crap_tastic_pdbs/134l.pdb']} # TODO improve merge mutations originating from the same pdb file in method _available_files_mutation_filtering. for mutant_pdb_file_dict in self._pdb_file_mutants: # Get file name to which the mutation it matched. protein_data_bank_file = self._pdb_file_mutants[mutant_pdb_file_dict][-1] # Information regarding the mutation itself : chain, position and new residue. protein_mutation_information = self._pdb_file_mutants[mutant_pdb_file_dict][0] # Find the path to the file by splitting the last right slash of from the file. mutant_protein_data_bank_path_and_file = protein_data_bank_file.rsplit("/", 1) # Get iteration number of the pdb file, in case different mutations are made within the same pdb. mutant_number = mutant_pdb_file_dict.split(",")[0] # Put the name of the original protein together with the mutant iteration number together for the filename. mutant_protein_data_bank_path_and_file[-1] = mutant_protein_data_bank_path_and_file[-1].split(".")[ 0] + "_" + mutant_number + self._generate_mutant_file_name( protein_mutation_information) mutant_protein_data_bank_path_and_file[0] = output_folder # Execute a script which adds missing atoms to the PDB structure, returns a model. complemented_pdb = modeller.scripts.complete_pdb(protein_environment, protein_data_bank_file) # Instantiate an alignment object and add the model to it. pdb_alignment_array = modeller.alignment(protein_environment) pdb_alignment_array.append_model(complemented_pdb, atom_files=protein_data_bank_file, align_codes=protein_data_bank_file) # Apply each mutation to the current protein databank file. for protein_chain_index in range(0, len(protein_mutation_information), 3): # # [CHAIN , RESIDUE_POSITION , RESIDUE_TYPE] # # [protein_chain_index, protein_chain_index +1, protein_chain_index +2] protein_selection = modeller.selection( complemented_pdb.chains[protein_mutation_information[protein_chain_index]].residues[ int(protein_mutation_information[protein_chain_index + 1])]) protein_selection.mutate(residue_type=protein_mutation_information[protein_chain_index + 2]) # Add the mutated model to the alignment. pdb_alignment_array.append_model(complemented_pdb, align_codes=protein_data_bank_file) # Remove the topology from the model. complemented_pdb.clear_topology() # Copy over the topology from the the mutated model that resides within the alignment. complemented_pdb.generate_topology(pdb_alignment_array[-1]) # Transfer the coordinates from the template native structure to the mutant. complemented_pdb.transfer_xyz(pdb_alignment_array) # In case of missing coordinates initialize_xyz=False generates coordinates. # Build method defines how the atoms are placed, with INTERNAL_COORDINATES the topology library is used. complemented_pdb.build(initialize_xyz=False, build_method='INTERNAL_COORDINATES') revised_model = modeller.model(env=protein_environment, file=protein_data_bank_file) complemented_pdb.res_num_from(revised_model, pdb_alignment_array) # Write the new pdb file. # Collect the mutated_structures in a dict. self._stored_mutant_structures.append("".join(mutant_protein_data_bank_path_and_file) + ".pdb") # print("".join(mutant_protein_data_bank_path_and_file) + ".pdb") complemented_pdb.write(file="/".join(mutant_protein_data_bank_path_and_file) + ".pdb")
IMP.setup_from_argv(sys.argv, "IMP restraints in Modeller") # Set up Modeller and build a model from the GGCC primary sequence e = modeller.environ() e.edat.dynamic_sphere = False e.libs.topology.read('${LIB}/top_heav.lib') e.libs.parameters.read('${LIB}/par.lib') modmodel = modeller.model(e) modmodel.build_sequence('GGCC') # Set up IMP and load the Modeller model in as a new Hierarchy m = IMP.Model() protein = IMP.modeller.ModelLoader(modmodel).load_atoms(m) # Create a simple IMP distance restraint between the first and last atoms atoms = IMP.atom.get_by_type(protein, IMP.atom.ATOM_TYPE) r = IMP.core.DistanceRestraint(m, IMP.core.Harmonic(10.0, 1.0), atoms[0].get_particle(), atoms[-1].get_particle()) sf = IMP.core.RestraintsScoringFunction([r]) # Use the IMPRestraints class to add this IMP scoring function to the # Modeller scoring function t = modmodel.env.edat.energy_terms t.append(IMP.modeller.IMPRestraints(atoms, sf)) # Calculate the Modeller energy (score) for the whole protein sel = modeller.selection(modmodel) sel.energy()
def _build_models(structfname, basedir, nmodels, refstructure, verbose, seq_rep_list): """ Builds replicate structural models of a list of protein sequences. seq_rep_list is a list of (sequence,replicates) pairs, giving each sequence object to be modeled and the number of replicates needed for that sequence object SIDE EFFECT: models are placed in basedir/sequence_id directory """ # set up path links, assuming current working directory workingdir = os.getcwd() structfname = os.path.normpath(os.path.join(workingdir, structfname)) basedir = os.path.normpath(os.path.join(workingdir, basedir)) # calculate total number of reps for each sequence id reps_per_id = {} for seq,reps in seq_rep_list: if seq.identifier in reps_per_id.keys(): reps_per_id[seq.identifier] += reps else: reps_per_id[seq.identifier] = reps for seq,reps in seq_rep_list: # calculate some information on total reps for this id and how many # models to build for this particular sequence total_reps_needed = reps_per_id[seq.identifier] models_per_rep = round(nmodels / total_reps_needed) if models_per_rep < 1: models_per_rep = 1 mynmodels = models_per_rep * reps # check this sequence's existing structures; bail out if done mindex = 1 outdir = basedir + os.path.sep + seq.identifier if not os.path.isdir(outdir): os.makedirs(outdir) else: existing_fnames = [ x.split(os.path.sep)[-1] for x in \ glob.glob(outdir + os.path.sep + 'rep*.pdb') ] existing_reps = [ int(x.split('rep')[1].split('.pdb')[0]) for \ x in existing_fnames] if existing_reps: existing_reps.sort(reverse=True) last_rep = existing_reps[0] if last_rep < total_reps_needed: mindex = existing_reps[0] + 1 else: continue # set up temporary directory for modeller execution with tempfile.TemporaryDirectory(prefix=dnameprefix) as tempdir: os.chdir(tempdir) # set up modeller environment if verbose: modeller.log.verbose() else: modeller.log.none() env = modeller.environ() env.io.atom_files_directory = [workingdir] # set up complete alignment aln = modeller.alignment(env) aln.append(file=structfname, remove_gaps=False) knowns = [s.code for s in aln] aln.append_sequence(seq.sequence) aln[-1].code = seq.identifier # write alignment - modeller doesn't like alignment in memory full_aln_fname = 'structaligntemp.ali' aln.write(full_aln_fname, alignment_format='PIR') # set up model assessments ASSESS_METHODS = [modeller.automodel.assess.DOPE, modeller.automodel.assess.DOPEHR] ASSESS_NAMES = ["DOPE score", "DOPE-HR score"] a = modeller.automodel.dope_loopmodel(env, alnfile=full_aln_fname, knowns=knowns, sequence=seq.identifier, assess_methods=ASSESS_METHODS) a.starting_model = 1 # index of the first model a.ending_model = mynmodels # index of the last model # adjust optimization parameters a.library_schedule = modeller.automodel.autosched.slow a.md_level = modeller.automodel.refine.slow a.make() # do homology modeling # evaluate structural models ok_models = [ x for x in a.outputs if x["failure"] is None ] score_results = [] for data in ok_models: fname = data["name"] myscrs = [] for score_name in ASSESS_NAMES: myscrs.append(data[score_name]) ave_score = sum(myscrs) / len(myscrs) score_results.append((ave_score, fname, myscrs)) score_results.sort() best_models = score_results[:reps] rest_models = score_results[reps:] # map to reference structure refseq = aln[0] if refstructure: refseq = aln[refstructure] refcode = refseq.code refpdbf = refseq.atom_file refrange = refseq.range refmdl = modeller.model(env, file=refpdbf, model_segment=refrange) refpos = modeller.selection(refmdl).only_atom_types('CA') # get best models final_files = [] for (score,infname,scores) in best_models: outfname = outdir + os.path.sep + 'rep{}.pdb'.format(mindex) final_files.append(outfname) # build alignment myaln = modeller.alignment(env) myaln.append(file=structfname, align_codes=(refcode), remove_gaps=False) myaln.append_sequence(seq.sequence) myaln[-1].code = seq.identifier myaln[-1].atom_file = infname # read pdb file mymodel = modeller.model(env, file=infname) # translate to reference coordinates r = refpos.superpose(mymodel, myaln) # write translated pdb file mymodel.write(file=outfname) mindex += 1 os.chdir(workingdir) return