def test_script9(self): """Test step 9 (multiple fitting)""" # Get inputs (outputs from step 8) for i in ('top', 'bottom'): shutil.copy('precalculate_results/stage8_split_density/' \ 'groel-11.5A.%s.mrc' % i, 'output') # Make sure the script runs without errors p = subprocess.check_call(['scripts/' \ 'script9_symmetric_multiple_fitting.py']) e = modeller.environ() ref = modeller.model(e, file='precalculate_results/stage9_symmetric_multiple_fitting/' \ 'model.top.0.pdb') sel = modeller.selection(ref).only_atom_types('CA') # At least one model in each ring should be close to the reference for side in ('top', 'bottom'): rms = [] for i in range(6): fname = 'output/model.%s.%d.pdb' % (side, i) m = modeller.model(e, file=fname) a = modeller.alignment(e) a.append_model(ref, align_codes='ref') a.append_model(m, align_codes='model') rms.append(sel.superpose(m, a).rms) os.unlink(fname) self.assertTrue(min(rms) < 10.0) os.unlink('output/intermediate_asmb_sols.out') for side in ('top', 'bottom'): os.unlink('output/multifit.%s.output' % side) os.unlink('output/multifit.%s.output.symm.ref' % side) os.unlink('output/multifit.%s.param' % side)
def find(self): """Return a Modeller selection corresponding to the allosteric site. @raise AllostericSiteError on error.""" if self.__allosteric_site is None: # align PDB2 to PDB1 and superimpose antigen try: salign0(self.env, self.pdb1, self.pdb2) except modeller.ModellerError as err: raise AllostericSiteError("Could not align %s with %s: %s. " "This is usually due to a poor alignment." % (self.pdb2, self.pdb1, str(err))) pmfit = get_fit_filename(self.pdb2) # determine residues in PDB2 that contact LIG1 self.__pmfit = modeller.model(self.env, file=pmfit) lig1 = modeller.model(self.env, file=self.ligand) self.__allosteric_site = \ modeller.selection([ri for ri, rj, dist \ in get_inter_contacts(self.env, self.__pmfit, lig1, self.rcut)]) os.unlink(pmfit) os.unlink(get_fit_filename(self.pdb1)) if len(self.__allosteric_site) == 0: raise AllostericSiteError("No allosteric site found") return self.__allosteric_site
def test_script6(self): """Test step 6 (model building and assessment)""" # Get inputs (outputs from steps 3 and 5) shutil.copy('precalculate_results/stage3_density_segmentation/' \ 'groel_subunit_11.mrc', 'output') shutil.copy('precalculate_results/stage5_template_alignment/' \ 'groel-1iokA.ali', 'output') # Make sure the script runs without errors p = subprocess.check_call(['scripts/' \ 'script6_model_building_and_assessment.py']) # Check output models e = modeller.environ() for i in range(1, 11): base = 'output/P0A6F5.B9999%04d' % i pdb = base + '.pdb' trunc_pdb = base + '.truncated.pdb' trunc_fit_pdb = base + '.truncated.fitted.pdb' m = modeller.model(e, file=pdb) self.assertEqual(len(m.residues), 548) m = modeller.model(e, file=trunc_pdb) self.assertEqual(len(m.residues), 524) m = modeller.model(e, file=trunc_fit_pdb) self.assertEqual(len(m.residues), 524) os.unlink(pdb) os.unlink(trunc_pdb) os.unlink(trunc_fit_pdb) scores = 'output/model_building.scores.output' wc = len(open(scores).readlines()) # Should be one line for each of 10 models, plus a header self.assertEqual(wc, 11) os.unlink(scores)
def align_template_to_reference(msmseed, ref_msmseed): import modeller import tempfile import shutil import copy import os temp_dir = tempfile.mkdtemp() try: os.chdir(temp_dir) alignment_file = open('aln_tmp.pir','w') aln = _PIR_alignment(ref_msmseed.template_sequence, ref_msmseed.template_id, msmseed.template_sequence, msmseed.template_id) alignment_file.writelines(aln) alignment_file.close() template_file = open(msmseed.template_id + '.pdb','w') template_pdb = msmseed.template_structure template_pdb.writeFile(template_pdb.topology, template_pdb.positions, template_file) template_file.close() ref_pdb = ref_msmseed.template_structure ref_file = open(ref_msmseed.template_id + '.pdb', 'w') ref_pdb.writeFile(ref_pdb.topology, ref_pdb.positions, ref_file) ref_file.close() modeller.log.none() env = modeller.environ() env.io.atom_files_directory = temp_dir aln = modeller.alignment(env, file='aln_tmp.pir', align_codes=(ref_msmseed.template_id, msmseed.template_id)) mdl = modeller.model(env, file=ref_msmseed.template_id + '.pdb') mdl2 = modeller.model(env, file=msmseed.template_id+'.pdb') atmsel = modeller.selection(mdl).only_atom_types('CA') r = atmsel.superpose(mdl2, aln) msmseed.rmsd_to_reference = copy.deepcopy(r.rms) except Exception as e: msmseed.error_message = e.message finally: shutil.rmtree(temp_dir) return msmseed
def file_to_model(pdbfile, chain): if chain is None: return modeller.model(env, file=pdbfile) else: return modeller.model(env, file=pdbfile, model_segment=('FIRST:%s' % chain, 'LAST:%s' % chain))
def test_integrative_modeling(self): """Test the entire integrative modeling run""" import modeller # Compile the clustering program subprocess.check_call(['gfortran', 'cluster.f', 'u3best.f', '-o', 'cluster.x'], cwd='integrative_modeling/bin') # Run sampling subprocess.check_call(['./run_modeling.py'], cwd='integrative_modeling') # Analysis subprocess.check_call(['bin/get_frames.sh'], cwd='integrative_modeling') # Make sure that at least two of the three "known good" clusters # are reproduced clusters = glob.glob('integrative_modeling/clustering/clus.*.pdb') clusters = [x for x in clusters if '-' not in x] exp_clusters = glob.glob('model_refinement/cluster*/model.pdb') env = modeller.environ() n_cluster = 0 rms = [] cluster_match = [0] * len(clusters) exp_cluster_match = [0] * len(exp_clusters) # Get a matrix of RMSD between all clusters and the expected clusters for ncluster, cluster in enumerate(clusters): per_cluster = [] for nexp_cluster, exp_cluster in enumerate(exp_clusters): mc = modeller.model(env, file=cluster) s = modeller.selection(mc) a = modeller.alignment(env) me = modeller.model(env, file=exp_cluster) a.append_model(mc, align_codes='clus') a.append_model(me, align_codes='exp_clus') # We only care about the global (non-cutoff) RMSD, so use a # large cutoff so that refine_local doesn't increase the number # of equivalent positions at the expense of worsening the RMSD r = s.superpose(me, a, rms_cutoff=999.) if r.rms < 15.0: cluster_match[ncluster] += 1 exp_cluster_match[nexp_cluster] += 1 per_cluster.append(r.rms) rms.append(per_cluster) # Count the number of clusters which are close to an expected cluster ncluster_match = len(cluster_match) - cluster_match.count(0) # Count the number of expected clusters which are close to a cluster nexp_cluster_match = len(exp_cluster_match) - exp_cluster_match.count(0) # Make sure that at least 2 of the 3 expected clusters is close to one # of the clusters we produced (but not all the *same* cluster) self.assertTrue(ncluster_match >= 2 and nexp_cluster_match >= 2, "Could not find any match between the %d clusters " "found in this test and 2 of the 3 'known good' " "clusters (match defined as all-atom RMSD less than " "15.0A). RMSD matrix: %s" % (len(clusters), str(rms)))
def perform_sequence_alignment(): e = modeller.environ() m1 = modeller.model(e, file='experimental.pdb') m2 = modeller.model(e, file='rosetta.pdb') aln = modeller.alignment(e) aln.append_model(m1, align_codes='experimental', atom_files='experimental.pdb') aln.append_model(m2, align_codes='rosetta') aln.align2d() aln.write(file='align.ali', alignment_format='PIR')
def refined_vs_notrefined(models_dir, dope_profile): """Creates a comparison energy plot between the model generated by the program and the refined one.""" env = environ() env.io.atom_files_directory = [models_dir] mdl_list = [] aln = modeller.alignment(env) code_list = [] mdl_nr_list = [] aln_nr = modeller.alignment(env) code_list_nr = [] for file in os.listdir(models_dir): name = file if name.endswith('.B'): mdl = modeller.model(env) mdl.read(file = file) code = str(file) code_list.append(code) s = selection(mdl) s.assess_dope(output='ENERGY_PROFILE NO_REPORT', file= models_dir + code + '.profile', normalize_profile=True, smoothing_window=15) mdl_list.append(mdl) aln.append_model(mdl, align_codes = code, atom_files = code) aln.write(file=dope_profile+'build_profile_ref.ali', alignment_format='PIR') else: mdl_nr = modeller.model(env) mdl_nr.read(file = file) code = str(file) print (code) code_list_nr.append(code) t = selection(mdl_nr) t.assess_dope(output='ENERGY_PROFILE NO_REPORT', file= models_dir + code + '.profile', normalize_profile=True, smoothing_window=15) mdl_nr_list.append(mdl_nr) aln_nr.append_model(mdl_nr, align_codes = code, atom_files = code) aln_nr.write(file=dope_profile+'build_profile_notref.ali', alignment_format='PIR') if len(mdl_nr_list) == len(mdl_list): for a, b, c, d in zip(mdl_nr_list, mdl_list, code_list_nr, code_list): model1 = get_profile(models_dir + c + ".profile", aln_nr[str(c)]) model2 = get_profile(models_dir + d + ".profile", aln[str(d)]) pylab.figure(1, figsize=(30,18)) pylab.xlabel('Alignment position', fontsize = 20) pylab.ylabel('DOPE per-residue score', fontsize = 20) pylab.plot(model1, color='red', linewidth=2, label='Model') pylab.plot(model2, color='green', linewidth=2, label='Optimized model') pylab.legend(fontsize = 20) pylab.savefig(dope_profile + c + '.dope_profile.jpg', dpi=100) pylab.close() return ("Comparison energy plot between refined and not refined model has been created here:\n%s\n" % (dope_profile))
def main(): import modeller file1, file2, rcut = parse_args() e = modeller.environ() e.io.hetatm = True mdl1 = modeller.model(e, file=file1) mdl2 = modeller.model(e, file=file2) for ri, rj, dist in get_inter_contacts(e, mdl1, mdl2, rcut): print(" %6s %2s %6s %2s %3s %3s%3d%11.3f %1d %1d" % (ri.num, ri.chain.name, rj.num, rj.chain.name, ri.pdb_name, rj.pdb_name, get_contact_type(ri, rj), dist, 6, 6))
def test_feature_sidechain_biso(self): """Check average sidechain Biso feature""" env = self.get_environ() mlib = self.get_mdt_library() self.assertRaises(ValueError, mdt.features.SidechainBiso, mlib, bins=mdt.uniform_bins(5, 0, 10), protein=3) sidechain_biso = mdt.features.SidechainBiso(mlib, bins=mdt.uniform_bins(5, 0, 10)) mdl = modeller.model(env) mdl.build_sequence("A") aln = modeller.alignment(env) aln.append_model(mdl, align_codes="test") s = aln[0] # Mainchain atom Biso should be ignored: for mainchain in ("N:1", "C:1", "O:1", "OXT:1", "CA:1"): s.atoms[mainchain].biso = 1000 for (biso, bin) in ( (22, 2), (32, 3), # Map regular values to bins (0, -1), # Zero Biso should be "undefined" (1, 3), ): # Biso < 2 is multiplied by 4pi^2 s.atoms["CB:1"].biso = biso m = mdt.Table(mlib, features=sidechain_biso) m.add_alignment(aln) self.assertEqual(m.shape, (6,)) self.assertEqual(m.sum(), 1) self.assertEqual(m[bin], 1)
def main(args): mod.log.verbose() env = mod.environ() env.io.atom_files_directory = [".", args.dir, "../" + args.dir] aln = mod.alignment(env) mdl = mod.model( env, file=args.template, model_segment=( "FIRST:" + args.chains[0].upper(), "LAST:" + args.chains[1].upper(), ), ) aln.append_model( mdl, align_codes=args.template.replace(".pdb", ""), atom_files=args.template ) sequence_file = os.path.join(args.dir, args.target) sequence_code = args.target.replace(".ali", "") aln.append(file=sequence_file, align_codes=sequence_code) aln.align2d() # perform alignment align_file = os.path.join( args.dir, sequence_code + "-" + args.template.replace(".pdb", "") ) aln.write(file=align_file + ".ali", alignment_format="PIR") # para o modeller aln.write(file=align_file + ".pap", alignment_format="PAP") # +fácil de ler # check files aln.check()
def test_glyc(self): """Test glycosylation benchmark""" os.chdir(os.path.join(TOPDIR, 'benchmark', 'input_glyc')) # Cleanup anything left over from a previous run shutil.rmtree('pred_dECALCrAS1000', ignore_errors=True) subprocess.check_call(['allosmod', 'setup']) # Setup should generate ligand and script: for f in ['lig.pdb', 'qsub.sh']: self.assertTrue(os.path.exists(f)) # Run the protocol subprocess.check_call(['/bin/sh', '--login', './qsub.sh']) # Should generate more files: os.chdir('pred_dECALCrAS1000/2AAS.pdb_0') for f in ['align2.ali', 'allosmod.py', 'converted.rsr', 'model_glyc.log', 'model_glyc.py', 'pm.pdb.B99990001.pdb', 'pm.pdb.D00000001', 'pm.pdb.V99990001', 'run.log']: self.assertTrue(os.path.exists(f)) # Generated model should have sugars added to second chain e = modeller.environ() e.io.hetatm = True m = modeller.model(e, file='pm.pdb.B99990001.pdb') self.assertEqual(len(m.chains), 2) self.assertEqual(len(m.chains[0].residues), 124) self.assertEqual(len(m.chains[1].residues), 16) self.assertEqual([r.name for r in m.chains[1].residues], ['NAG', 'NAG', 'BMA', 'MAN', 'MAN', 'MAN', 'MAN', 'MAN', 'NAG', 'NAG', 'BMA', 'MAN', 'MAN', 'MAN', 'MAN', 'MAN'])
def setup_atoms(self, env): self.m = modeller.model(env, file=self.pdb_file) self.atoms = [Atom(a) for a in self.m.atoms] self.contacts = get_contacts(self.contacts_pdbs, self.rcut) if self.break_file: self.breaks = get_breaks(open(self.break_file)) else: self.breaks = {} self.beta_structure = get_beta(self.pdb_file) NUCLEIC_ACIDS = dict.fromkeys(['ADE', 'A', 'DA', 'THY', 'T', 'DT', 'URA', 'U', 'DU', 'GUA', 'G', 'DG', 'CYT', 'C', 'DC']) BACKBONE_ATOMS = dict.fromkeys(['CA', 'CB', 'O', 'N', 'C', 'OT', 'NA', 'NB', 'NC', 'ND', 'C1A', 'C2A', 'C3A', 'C4A', 'C1B', 'C2B', 'C3B', 'C4B', 'C1C', 'C2C', 'C3C', 'C4C', 'C1D', 'C2D', 'C3D', 'C4D']) for a in self.atoms: r = a.a.residue if r.pdb_name in NUCLEIC_ACIDS: a.isNUC = True a.torestr = get_nuc_restrained(a.a.name, r.pdb_name) for rj in range(1, len(self.m.residues) + 1): self.contacts[(r.index,rj)] = True if a.a.name in BACKBONE_ATOMS or r.pdb_name in NUCLEIC_ACIDS: a.isSC = False a.isCA = a.a.name == 'CA' a.isCB = a.a.name == 'CB' else: a.isSC = a.a.name != 'H' for a, asrs in zip(self.atoms, parse_atomlist_asrs(open(self.atomlist_asrs))): a.isAS = asrs
def DOPE_profiles_maker(temp_dir, outputs): """Creates a DOPE profile plot (.jpg) from a macrocomplex (.pdb), which has no acid nucleic chains using Modeller.""" flist = [] env = environ() env.io.atom_files_directory = [temp_dir] dl = os.listdir(temp_dir) for file in dl: if file.startswith("mod"): flist.append(file) aln = modeller.alignment(env) for file in flist: mdl = modeller.model(env) code = str(file) mdl.read(file=code, model_segment=('FIRST:@', 'END:')) aln.append_model(mdl, align_codes=code, atom_files=code) t = selection(mdl) file_dope = outputs + code + '.profile' t.assess_dope(output='ENERGY_PROFILE NO_REPORT', file=file_dope, normalize_profile=True, smoothing_window=15) model = get_profile(file_dope, aln[str(file)]) pylab.figure(1, figsize=(20, 12)) pylab.xlabel('Alignment position', fontsize=20) pylab.ylabel('DOPE per-residue score', fontsize=20) pylab.plot(model, color='green', linewidth=3, label=file[3:-4]) pylab.savefig(outputs + file[:-4] + '.dope_profile.jpg', dpi=100) pylab.close() path_img = outputs + file[:-4] + '.dope_profile.jpg' return("DOPE profile plot for model created here:\n %s\n" % (path_img))
def get_sas(pdb,probe): import modeller # Read the PDB file env = modeller.environ() mdl = modeller.model(env) mdl.read(file=pdb) # Calculate atomic accessibilities (in Biso) with appropriate probe_radius myedat = modeller.energy_data() myedat.radii_factor = 1.6 mdl.write_data(edat=myedat, output='PSA ATOMIC_SOL', psa_integration_step=0.05, probe_radius=probe) mdl.write(file=pdb.rsplit('.',1)[0]+'.sas') # read SAS with open('%s.sas' % (pdb.rsplit('.',1)[0], )) as data: D = data.readlines() Sas = {} for d in D: d = d.strip() if d[:4]=='ATOM': atom, res, resid, cid = d[12:16], d[17:20], int(d[22:26]), d[21] if cid == ' ': cid='A' Sas[(atom,res,resid,cid)] = float(d[60:66]) return Sas
def save_modeller_output_files(target, model_dir, a, env, model_pdbfilepath, model_pdbfilepath_uncompressed, write_modeller_restraints_file=False): # save PDB file # Note that the uncompressed pdb file needs to be kept until after the clustering step has completed tmp_model_pdbfilepath = a.outputs[0]['name'] target_model = modeller.model(env, file=tmp_model_pdbfilepath) target_model.write(file=model_pdbfilepath_uncompressed) with open(model_pdbfilepath_uncompressed) as model_pdbfile: with gzip.open(model_pdbfilepath, 'w') as model_pdbfilegz: model_pdbfilegz.write(model_pdbfile.read()) # Write sequence identity. seqid_filepath = os.path.abspath( os.path.join(model_dir, 'sequence-identity.txt')) with open(seqid_filepath, 'w') as seqid_file: seqid_file.write('%.1f\n' % target_model.seq_id) # Copy restraints. if write_modeller_restraints_file: restraint_filepath = os.path.abspath( os.path.join(model_dir, 'restraints.rsr.gz')) with open('%s.rsr' % target.id, 'r') as rsrfile: with gzip.open(restraint_filepath, 'wb') as rsrgzfile: rsrgzfile.write(rsrfile.read())
def get_sas(pdb, probe): import modeller # Read the PDB file env = modeller.environ() mdl = modeller.model(env) mdl.read(file=pdb) # Calculate atomic accessibilities (in Biso) with appropriate probe_radius myedat = modeller.energy_data() myedat.radii_factor = 1.6 mdl.write_data(edat=myedat, output="PSA ATOMIC_SOL", psa_integration_step=0.05, probe_radius=probe) mdl.write(file=pdb.rsplit(".", 1)[0] + ".sas") # read SAS with open("%s.sas" % (pdb.rsplit(".", 1)[0],)) as data: D = data.readlines() Sas = {} for d in D: d = d.strip() if d[:4] == "ATOM": atom, res, resid, cid = d[12:16], d[17:20], int(d[22:26]), d[21] if cid == " ": cid = "A" Sas[(atom, res, resid, cid)] = float(d[60:66]) return Sas
def _align_structures(structures, verbose): """Aligns structures using iterative structural alignment.""" # set up modeller environment if verbose: modeller.log.verbose() else: modeller.log.none() env = modeller.environ() aln = modeller.alignment(env) # read structures into modeller environment for (id, structure) in structures.items(): mdl = modeller.model(env, file=structure) aln.append_model(mdl, align_codes=id, atom_files=structure) # align structures using iterative structural alignment modeller.salign.iterative_structural_align(aln) # convert modeller alignment to Alignment object mod_aln_f = tempfile.NamedTemporaryFile(mode='w', prefix=fnameprefix, suffix='.ali', delete=False) mod_aln_fname = mod_aln_f.name mod_aln_f.close() aln.write(mod_aln_fname, alignment_format='PIR') alnobj = Alignment(mod_aln_fname) os.remove(mod_aln_fname) return alnobj
def test_disulfide(self): """Test handling of disulfide bonds""" mlib = self.get_all_libraries() bsep = mdt.features.AtomBondSeparation(mlib, bins=mdt.uniform_bins(20, 0, 1.0)) bsep_ss = mdt.features.AtomBondSeparation(mlib, bins=mdt.uniform_bins(20, 0, 1.0), disulfide=True) env = self.get_environ() mdl = modeller.model(env) mdl.build_sequence('CC') # When SG-SG distance is small enough, an extra bond # (separation feature = 1) should be detected, but only with # disulfide=True for (dist, num) in [(2.6, 11.0), (2.4, 12.0)]: sg1 = mdl.residues[0].atoms['SG'] sg2 = mdl.residues[1].atoms['SG'] sg1.x = sg1.y = sg1.z = 0. sg2.x = sg2.y = 0. sg2.z = dist a = modeller.alignment(env) a.append_model(mdl, atom_files='test', align_codes='test') m = mdt.Table(mlib, features=bsep) m.add_alignment(a, residue_span_range=(-999,0,0,999)) self.assertEqual(m[1], 11.0) m2 = mdt.Table(mlib, features=bsep_ss) m2.add_alignment(a, residue_span_range=(-999,0,0,999)) self.assertEqual(m2[1], num)
def complete_pdb(env, filename, special_patches=None, transfer_res_num=False, model_segment=None, patch_default=True): """Reads the given PDB file, reorders the atoms to match the current topology library, and adds any missing atoms. You should read topology and parameters into 'env' before calling this routine. :param env: Modeller environment. :type env: :class:`environ` :param filename: the PDB file to read. :param special_patches: if set, it is expected to be a routine which takes one parameter (the model) and applies any patches (e.g. disulfide bridges). :param transfer_res_num: if True, the residue numbering from the original PDB is retained (by default, residues are renumbered from 1). :param patch_default: if True, default terminal patches are applied. :return: the completed model. :rtype: :class:`model`""" vars = {} if model_segment is not None: vars['model_segment'] = model_segment mdl = model(env, file=filename, model_format='PDB_OR_MMCIF', **vars) # Save original chain IDs, since generate_topology resets them chain_ids = [c.name for c in mdl.chains] aln = alignment(env) aln.append_model(mdl, atom_files=filename, align_codes='struc') aln.append_model(mdl, atom_files=filename+'.ini', align_codes='struc-ini') mdl.clear_topology() mdl.generate_topology(aln[-1], patch_default=patch_default) if special_patches: special_patches(mdl) # Save original seq_id, as transfer_xyz sets it seq_id = mdl.seq_id mdl.transfer_xyz(aln) mdl.seq_id = seq_id # Restore original chain IDs for (chain, chainid) in zip(mdl.chains, chain_ids): chain.name = chainid mdl.build(initialize_xyz=False, build_method='INTERNAL_COORDINATES') if transfer_res_num: mdl2 = model(env, file=filename, **vars) mdl.res_num_from(mdl2, aln) return mdl
def make_model(msmseed): """ Use MODELLER from the Sali lab to create a model between the target and template specified in the input Parameters ---------- msmseed : MSMSeed object containing the alignment between target and template and template structure Returns ------- msmseed : MSMSeed object containing the homology model built from the input alignment and template structure """ import tempfile import os import modeller import modeller.automodel import shutil import simtk.openmm.app as app #if the target and template are the same, modeller dies. if msmseed.template_id == msmseed.target_id: msmseed.target_model = msmseed.template_structure return msmseed #first, we need to make a temp directory where we can put the files MODELLER needs temp_dir = tempfile.mkdtemp() try: os.chdir(temp_dir) alignment_file = open('aln_tmp.pir','w') alignment_file.writelines(msmseed.alignment) alignment_file.close() template_file = open(msmseed.template_id + '.pdb','w') template_pdb = msmseed.template_structure template_pdb.writeFile(template_pdb.topology, template_pdb.positions, template_file) template_file.close() modeller.log.none() env = modeller.environ() env.io.atom_files_directory = temp_dir a = modeller.automodel.allhmodel(env, # file with template codes and target sequence alnfile = 'aln_tmp.pir', # PDB codes of the template knowns = msmseed.template_id, # code of the target sequence = msmseed.target_id) a.make() tmp_model_pdbfilename = a.outputs[0]['name'] target_model = modeller.model(env, file=tmp_model_pdbfilename) msmseed.sequence_similarity = target_model.seq_id msmseed.target_model = app.PDBFile(tmp_model_pdbfilename) msmseed.target_restraints = open('%s.rsr' % msmseed.target_id, 'r').readlines() except: msmseed.error_state = -1 finally: shutil.rmtree(temp_dir) return msmseed
def test_detect_invalid_residue_types_ok(self): """Test _detect_invalid_residue_types() with OK sequence""" with utils.temporary_directory() as tmpdir: fname = os.path.join(tmpdir, 'test.pdb') with open(fname, 'w') as fh: fh.write(pdb_line + '\n') e = modeller.environ() m = modeller.model(e, file=fname) cleaning._detect_invalid_residue_types(m)
def make_model(self): import modeller env = modeller.environ() env.edat.dynamic_sphere= False with open('test.pdb', 'w') as fh: fh.write("ATOM 2 CA ALA 1 27.449 14.935 5.140 1.00 29.87 C\n") m = modeller.model(env, file='test.pdb') os.unlink('test.pdb') return m
def copy_to_modeller(env, particles): fh = open("temp_particles.pdb", "w") for i in range(len(particles)): fh.write("ATOM %5d N ALA 0 0.000 0.000 0.000 " "1.00 0.00 C \n" % (i)) fh.close() mdl = modeller.model(env, file='temp_particles.pdb') os.unlink('temp_particles.pdb') return mdl
def spline(pdb_file, in_restraints, out_restraints): import modeller # Needed to keep our custom form alive for restraints.read() from allosmod.modeller.forms import TruncatedGaussian e = modeller.environ() m = modeller.model(e, file=pdb_file) m.restraints.read(file=in_restraints) convert_restraints(m.restraints) m.restraints.write(file=out_restraints)
def mk_strct_al_modeller(strct_data1, strct_data2): _stdout = sys.stdout sys.stdout = open(os.devnull, 'w') tmp_file = tempfile.NamedTemporaryFile(suffix=".fasta", delete=False) env = m.environ() aln = m.alignment(env) code1 = 'pdb' + strct_data1['id'] code2 = 'pdb' + strct_data2['id'] chain1 = strct_data1['chain_id'] chain2 = strct_data2['chain_id'] env.io.atom_files_directory = ['.', PDB_DIR] result = {} try: for (code, chain) in ((code1, chain1), (code2, chain2)): mdl = m.model(env, file=code, model_segment=('FIRST:'+chain, 'LAST:'+chain)) aln.append_model(mdl, atom_files=code, align_codes=code+chain) for (weights, write_fit, whole) in (((1., 0., 0., 0., 1., 0.), False, True), ((1., 0.5, 1., 1., 1., 0.), False, True), ((1., 1., 1., 1., 1., 0.), True, False)): r = aln.salign(rms_cutoff=3.5, normalize_pp_scores=False, rr_file='$(LIB)/as1.sim.mat', overhang=30, gap_penalties_1d=(-450, -50), gap_penalties_3d=(0, 3), gap_gap_score=0, gap_residue_score=0, alignment_type='tree', # If 'progresive', the tree is not # computed and all structures will be # aligned sequentially to the first #ext_tree_file='1is3A_exmat.mtx', # Tree building can be avoided # if the tree is input feature_weights=weights, # For a multiple sequence alignment only # the first feature needs to be non-zero improve_alignment=True, fit=True, write_fit=False, write_whole_pdb=whole, output='ALIGNMENT QUALITY') if r.qscorepct > 70: aln.write(file=tmp_file.name, alignment_format='FASTA') with open(tmp_file.name) as a: alignment = unwrap(a.read().splitlines()) for i in range(len(alignment[1])): if alignment[1] != '-' and alignment[3] != '-': pos1 = get_real_position_al(alignment[1], i) pos2 = get_real_position_al(alignment[3], i) result[pos1] = pos2 except: print 'Modeller failed' sys.stdout.close() sys.stdout = _stdout return result
def build_mdt_from_sequence(self, mlib, features, seq, **keys): """Build a simple test MDT for a given sequence""" env = self.get_environ() mdl = modeller.model(env) mdl.build_sequence(seq) m = mdt.Table(mlib, features=features) a = modeller.alignment(env) a.append_model(mdl, atom_files='test', align_codes='test') m.add_alignment(a, **keys) return m
def get_test_mdt(self, mlib, features): env = self.get_environ() mdl = modeller.model(env) mdl.build_sequence('C') m = mdt.Table(mlib, features=features) a = modeller.alignment(env) a.append_model(mdl, atom_files='test', align_codes='test') m.add_alignment(a) m = m.reshape(features, [0] * len(features), [-1] * len(features)) return m
def align_template_to_reference(msmseed, ref_msmseed): import modeller import tempfile import shutil import copy import os temp_dir = tempfile.mkdtemp() try: os.chdir(temp_dir) alignment_file = open('aln_tmp.pir', 'w') aln = _PIR_alignment(ref_msmseed.template_sequence, ref_msmseed.template_id, msmseed.template_sequence, msmseed.template_id) alignment_file.writelines(aln) alignment_file.close() template_file = open(msmseed.template_id + '.pdb', 'w') template_pdb = msmseed.template_structure template_pdb.writeFile(template_pdb.topology, template_pdb.positions, template_file) template_file.close() ref_pdb = ref_msmseed.template_structure ref_file = open(ref_msmseed.template_id + '.pdb', 'w') ref_pdb.writeFile(ref_pdb.topology, ref_pdb.positions, ref_file) ref_file.close() modeller.log.none() env = modeller.environ() env.io.atom_files_directory = temp_dir aln = modeller.alignment(env, file='aln_tmp.pir', align_codes=(ref_msmseed.template_id, msmseed.template_id)) mdl = modeller.model(env, file=ref_msmseed.template_id + '.pdb') mdl2 = modeller.model(env, file=msmseed.template_id + '.pdb') atmsel = modeller.selection(mdl).only_atom_types('CA') r = atmsel.superpose(mdl2, aln) msmseed.rmsd_to_reference = copy.deepcopy(r.rms) except Exception as e: msmseed.error_message = e.message finally: shutil.rmtree(temp_dir) return msmseed
def align(target_name: str, target_sequence: str, template_name: str, template_chain: chr) -> None: # creates a file called f'alignment_{target_name}_and_{template_name}.pir' # assumes a file already exists called f'{template_name}.pdb' target_pir = f'>P1;{target_name}\nsequence:{target_name}::::::::\n{target_sequence}*' target_pir = StringIO(target_pir) alignment_instance = m.alignment(env) model_instance = m.model(env) model_instance.read(file=template_name, model_segment=(f'FIRST:{template_chain}', f'LAST:{template_chain}')) alignment_instance.append_model(model_instance, align_codes=template_name, atom_files=template_name) alignment_instance.append(file=target_pir, align_codes=target_name) alignment_instance.align2d() alignment_instance.write(file=f'alignment_{target_name}_and_{template_name}.pir')
def salign0(env, ff1, ff2): import modeller aln = modeller.alignment(env) code = ff1 mdl = modeller.model(env, file=code, model_segment=('FIRST:@', 'END:')) fit_atoms = determine_fit_atoms(mdl) aln.append_model(mdl, atom_files=code, align_codes=code) code = ff2 mdl = modeller.model(env, file=code, model_segment=('FIRST:@', 'END:')) aln.append_model(mdl, atom_files=code, align_codes=code) for (weights, write_fit, whole) in (((1., 0., 0., 0., 1., 0.), False, True), ((1.,0.5, 1., 1., 1., 0.), False, True), ((1.,1., 1., 1., 1., 0.), True, False)): aln.salign(rms_cutoff=3.5, normalize_pp_scores=False, rr_file='$(LIB)/as1.sim.mat', overhang=30, gap_penalties_1d=(-450, -50), gap_penalties_3d=(0, 3), gap_gap_score=0, gap_residue_score=0, fit_atoms=fit_atoms, alignment_type='tree', feature_weights=weights, improve_alignment=True, fit=True, write_fit=write_fit, write_whole_pdb=whole, output='ALIGNMENT QUALITY') return aln
def _structureX_seq_from_modeller(self): """ return a str containing the first two lines of the sequence corresponding to structureX a file named [self._id]_structureX.seq also written """ env = modeller.environ() model = modeller.model(env, file=self._id) aln = modeller.alignment(env) aln.append_model(model, align_codes=self._id) out_file = self._id + "_structureX.seq" aln.write(file=out_file) out_str = open(out_file, "r").read() out_str = [c for c in out_str.split("\n") if c] out_str = "\n".join(out_str[:2]) + "\n*" return out_str
def test_read_alnstructure(self): """Check reading a Modeller alignment structure""" env = self.get_environ() m = modeller.model(env) m.build_sequence('C') a = modeller.alignment(env) a.append_model(m, align_codes='test', atom_files='test') m = IMP.Model() loader = IMP.modeller.ModelLoader(a[0]) mp = loader.load_atoms(m) all_atoms = IMP.atom.get_by_type(mp, IMP.atom.ATOM_TYPE) self.assertEqual(7, len(all_atoms)) # Alignment structures don't have charges or CHARMM types self.assertEqual(IMP.atom.Charged.get_is_setup(all_atoms[0]), False) self.assertEqual(IMP.atom.CHARMMAtom.get_is_setup(all_atoms[0]), False)
def test_feature_triplet_residue(self): """Check triplet features with residue qualifier""" env = self.get_environ() mlib = self.get_mdt_library() mlib.tuple_classes.read("test/data/trpcls-residue.lib") feat = mdt.features.TupleType(mlib) m = mdt.Table(mlib, features=feat) mdl = modeller.model(env) mdl.build_sequence("AAACAAACSAA") a = modeller.alignment(env) a.append_model(mdl, align_codes="test") m.add_alignment(a) self.assertEqual([x for x in m], [6.0, 2.0, 1.0, 1.0, 0.0, 0.0])
def test_read_alnstructure(self): """Check reading a Modeller alignment structure""" env = self.get_environ() m = modeller.model(env) m.build_sequence('C') a = modeller.alignment(env) a.append_model(m, align_codes='test', atom_files='test') m = IMP.kernel.Model() loader = IMP.modeller.ModelLoader(a[0]) mp = loader.load_atoms(m) all_atoms = IMP.atom.get_by_type(mp, IMP.atom.ATOM_TYPE) self.assertEqual(7, len(all_atoms)) # Alignment structures don't have charges or CHARMM types self.assertEqual(IMP.atom.Charged.get_is_setup(all_atoms[0]), False) self.assertEqual(IMP.atom.CHARMMAtom.get_is_setup(all_atoms[0]), False)
def test_script3(self): """Test step 3 (density segmentation)""" # Make sure the script runs without errors p = subprocess.check_call(['scripts/script3_density_segmentation.py']) # Should have produced a PDB with coordinates of all 14 subunit centers e = modeller.environ() m = modeller.model(e, file='output/groel_segments_center.pdb') self.assertEqual(len(m.atoms), 14) self.assertEqual(len(m.residues), 14) # load_configuration file should load all 14 subunits, and set level wc = len(open('output/load_configuration.cmd').readlines()) self.assertEqual(wc, 15) os.unlink('output/load_configuration.cmd') os.unlink('output/groel_segments_center.pdb') for i in range(14): os.unlink('output/groel_subunit_%d.mrc' % i)
def test_detect_invalid_residue_types_bad(self): """Test _detect_invalid_residue_types() with bad sequence""" with utils.temporary_directory() as tmpdir: fname = os.path.join(tmpdir, 'test.pdb') with open(fname, 'w') as fh: fh.write(""" ATOM 1 N CYS A 1 18.511 -1.416 15.632 1.00 6.84 C ATOM 2 C CYS A 1 18.511 -1.416 15.632 1.00 6.84 C ATOM 3 N HIE A 2 18.511 -1.416 15.632 1.00 6.84 C ATOM 4 C HIE A 2 18.511 -1.416 15.632 1.00 6.84 C ATOM 5 N HSD B 3 18.511 -1.416 15.632 1.00 6.84 C ATOM 6 C HSD B 3 18.511 -1.416 15.632 1.00 6.84 C """) e = modeller.environ() m = modeller.model(e, file=fname) self.assertRaises(cleaning.InvalidResiduesError, cleaning._detect_invalid_residue_types, m)
def test_dihedral_diff_periodic(self): """Make sure that dihedral difference features are periodic""" def set_omega(mdl, angle): ca = mdl.atoms["CA:1"] c = mdl.atoms["C:1"] n2 = mdl.atoms["N:2"] ca2 = mdl.atoms["CA:2"] n2.x = n2.y = n2.z = 0.0 c.x = -2.0 c.y = c.z = 0.0 ca.x = -2.0 ca.y = 2.0 ca.z = 0.0 ca2.x = 0.0 ca2.y = 2.0 * math.cos(math.pi * angle / 180.0) ca2.z = 2.0 * math.sin(math.pi * angle / 180.0) env = self.get_environ() mlib = self.get_mdt_library() # Make bins start at slightly less than -180, to allow for floating # point rounding omegadiff = mdt.features.OmegaDihedralDifference(mlib, mdt.uniform_bins(36, -180.01, 10)) # Note that difference must be shortest around the circle, so # 100.0 - (-100.0) is not 200 degrees but -160 degrees for dih1, dih2, expected in ( (80.0, 80.0, 0.0), (80.0, -80.0, -160.0), (-80.0, 80.0, 160.0), (-100.0, 100.0, -160.0), (100.0, -100.0, 160.0), ): m = mdt.Table(mlib, features=omegadiff) a = modeller.alignment(env) for d in dih1, dih2: mdl = modeller.model(env) mdl.build_sequence("CC") set_omega(mdl, d) a.append_model(mdl, atom_files="test", align_codes="test") m.add_alignment(a, sympairs=True) # 2 data points, 1 for each residue self.assertInTolerance(m.sample_size, 2.0, 1e-5) # Last residue has no omega, so is always undefined self.assertInTolerance(m[-1], 1.0, 1e-5) expected_bin = int((expected + 180.0) / 10.0) self.assertInTolerance(m[expected_bin], 1.0, 1e-5)
def find(self): m = self._m = modeller.model(self.env, file=self.pdb_file) charge = [0] * len(m.residues) total = [0] * len(m.residues) for a1, a2 in get_restrained_atoms(m, open(self.rsr_file)): r1 = a1.residue.index - 1 r2 = a2.residue.index - 1 total[r1] += 1 total[r2] += 1 if charged_ca_pair(a1, a2): charge[r1] += 1 charge[r2] += 1 self._total = total self._contacts = [] for n, z in enumerate(zip(charge, total)): c, t = z self._contacts.append((n+1, c if t > 143 else 0))
def get_q_ca(target, templates, rcut): import modeller modeller.log.none() e = modeller.environ() e.io.hetatm = False m = modeller.model(e) coord = get_coordinates(m, target) dist = get_distances(coord) q_tot = [] q_cut = [] for template in templates: t, c = get_qi_ca(m, len(coord), dist, template, rcut) q_tot.append(t) q_cut.append(c) write_q_scores(q_tot, open('qscore1to%d.dat' % len(coord), 'w')) write_q_scores(q_cut, open('qs_cut1to%d.dat' % len(coord), 'w'))
def test_simple(self): """Simple complete run of get_pm_initialstruct""" from allosmod.get_pm_initialstruct import get_pm_initialstruct with utils.temporary_directory() as tmpdir: self.setup_inputs(tmpdir) check_output(['allosmod', 'get_pm_initialstruct', '--target', 'foo', '--keep-alignment', 'test.aln', 'templates', '.', '1', 'slow'], cwd=tmpdir) e = modeller.environ() m = modeller.model(e, file=os.path.join(tmpdir, 'pred_1fdx', 'foo.B99990001.pdb')) self.assertEqual([x.code for x in m.residues], ['A', 'W']) self.assertEqual(m.chains[0].name, 'A') for f in ('1fdx', 'foo.B99990001.pdb', 'foo.ini', 'foo.sch', 'test.aln', 'foo.D00000001', 'foo.rsr', 'foo.V99990001'): os.unlink(os.path.join(tmpdir, 'pred_1fdx', f))
def test_simple(self): """Simple complete run of make_mod_inputs""" with allosmod.util.temporary_directory() as tempdir: self.setup_inputs(dir=tempdir) check_output(['allosmod', 'make_mod_inputs', '--', '1fdx', 'templates', '-3333', '3', '3', '3', '4'], cwd=tempdir) e = modeller.environ() for fname in ('random.ini', '1fdx.ini'): m = modeller.model(e, file=os.path.join(tempdir, fname)) self.assertEqual([x.code for x in m.residues], ['A', 'Y']) # Should have converted CA-only in all-atom model self.assertEqual(len(m.atoms), 18) with open(os.path.join(tempdir, '1fdx.rsr')) as fh: self.assertEqual(len(fh.readlines()), 78) for f in ('templates', 'avgpdb.pdb', '5fd1', 'align.ali', 'random.ini', '1fdx.ini', '1fdx.rsr'): os.unlink(os.path.join(tempdir, f))
def test_modeller_restraints(self): """Check using Modeller restraints in IMP""" e = modeller.environ() e.edat.dynamic_sphere = False e.libs.topology.read("${LIB}/top_heav.lib") e.libs.parameters.read("${LIB}/par.lib") modmodel = modeller.model(e) modmodel.build_sequence("GGCC") feat = modeller.features.distance(modmodel.atoms[0], modmodel.atoms[-1]) r = modeller.forms.gaussian(feature=feat, mean=10.0, stdev=1.0, group=modeller.physical.xy_distance) modmodel.restraints.add(r) m = IMP.kernel.Model() protein = IMP.modeller.ModelLoader(modmodel).load_atoms(m) atoms = IMP.atom.get_by_type(protein, IMP.atom.ATOM_TYPE) m.add_restraint(IMP.modeller.ModellerRestraints(m, modmodel, atoms)) assertSimilarModellerIMPScores(self, modmodel, protein) self.assertAlmostEqual(m.evaluate(False), 5.7837, delta=1e-3)
def test_imp_restraints(self): """Check using IMP restraints in Modeller""" e = modeller.environ() e.edat.dynamic_sphere = False e.libs.topology.read('${LIB}/top_heav.lib') e.libs.parameters.read('${LIB}/par.lib') modmodel = modeller.model(e) modmodel.build_sequence('GGCC') m = IMP.Model() protein = IMP.modeller.ModelLoader(modmodel).load_atoms(m) atoms = IMP.atom.get_by_type(protein, IMP.atom.ATOM_TYPE) r = IMP.core.DistanceRestraint(m, IMP.core.Harmonic(10.0, 1.0), atoms[0], atoms[-1]) sf = IMP.core.RestraintsScoringFunction([r]) t = modmodel.env.edat.energy_terms t.append(IMP.modeller.IMPRestraints(atoms, sf)) assertSimilarModellerIMPScores(self, sf, modmodel, protein) self.assertAlmostEqual(sf.evaluate(False), 9.80, delta=1e-2)
def test_get_coordinates_sc(self): """Test get_coordinates_sc() function""" e = modeller.environ() m = modeller.model(e) coord = cryptosite.analysis.get_coordinates_sc( m, os.path.join(TOPDIR, 'test', 'input', 'test_coord.pdb')) self.assertEqual(len(coord), 4) # First residue is a GLY with no CA -> no coordinates self.assertEqual(coord[0], None) # Second residue is a GLY with CA -> coordinates are those of # the GLY (0,0,0) self.assertAlmostEqual(coord[1].x, 0., places=1) self.assertAlmostEqual(coord[1].y, 0., places=1) self.assertAlmostEqual(coord[1].z, 0., places=1) # Third residue is a MET with no sidechain -> no coordinates self.assertEqual(coord[2], None) # Fourth residue is a MET with a sidechain -> mean coordinates returned self.assertAlmostEqual(coord[3].x, 5., places=1) self.assertAlmostEqual(coord[3].y, 10., places=1) self.assertAlmostEqual(coord[3].z, 15., places=1)
def get_contacts(pdb_file, rcut): import modeller modeller.log.none() e = modeller.environ() e.io.hetatm = True m = modeller.model(e, file=pdb_file) rcut2 = rcut * rcut av = [get_average_coordinate(r) for r in m.residues] for i in range(len(av) - 3): for j in range(i + 3, len(av)): ri = av[i].r rj = av[j].r if ri.hetatm and rj.hetatm: continue # do not print het-het contacts dist = get_contact_dist(av[i], av[j], rcut2) if dist is not None: yield ri, rj, dist
def get_qioft(landscape, rcut=11.): """Calculate Qi for all models in a landscape.""" import modeller modeller.log.none() e = modeller.environ() e.io.hetatm = False m = modeller.model(e) for dirname in _get_subdirectories(landscape): with open(os.path.join(dirname, 'list')) as fh: temp1 = 'pm_' + fh.readline().strip() pm = os.path.join(dirname, temp1) models = sorted(glob.glob(os.path.join(dirname, 'pm.pdb.B[1-8]*.pdb'))) coord = get_coordinates_sc(m, pm) dist = get_distances(coord, rcut) with open( os.path.join(dirname, 'qioft_%s_%dsc.dat' % (temp1, int(rcut))), 'w') as fh: for model in models: get_qi(m, len(coord), dist, model, fh) fh.write('\n')
def test_modeller_restraints(self): """Check using Modeller restraints in IMP""" e = modeller.environ() e.edat.dynamic_sphere = False e.libs.topology.read('${LIB}/top_heav.lib') e.libs.parameters.read('${LIB}/par.lib') modmodel = modeller.model(e) modmodel.build_sequence('GGCC') feat = modeller.features.distance(modmodel.atoms[0], modmodel.atoms[-1]) r = modeller.forms.gaussian(feature=feat, mean=10.0, stdev=1.0, group=modeller.physical.xy_distance) modmodel.restraints.add(r) m = IMP.kernel.Model() protein = IMP.modeller.ModelLoader(modmodel).load_atoms(m) atoms = IMP.atom.get_by_type(protein, IMP.atom.ATOM_TYPE) m.add_restraint(IMP.modeller.ModellerRestraints(m, modmodel, atoms)) assertSimilarModellerIMPScores(self, modmodel, protein) self.assertAlmostEqual(m.evaluate(False), 5.7837, delta=1e-3)
def _create_aligment(self, env, base_models): _log.debug("creating alignments for %s with %s pdbs" % (self.seqrecord.id, len(base_models))) aligned_models = [] env.io.atom_files_directory = [self.out_folder + '/'] aln = alignment(env) aln.append_sequence(str(self.seqrecord.seq)) aln[0].code = str(self.seqrecord.id) for i, pdb_chain_file_path in enumerate(base_models, 1): # TODO sacar parseo feo code = pdb_chain_file_path.split("/")[-1].replace(".ent", "").replace( "pdb", "") m = model(env, file=code) aln.append_model(m, align_codes=code) aln[i].code = code aligned_models.append(code) aln.malign() aln.id_table(matrix_file=self.seqrecord.id + '_family.mat') aln.write(file=self.model_directory() + "/" + self.seqrecord.id + '.ali', alignment_format='PIR') assert os.path.exists(self.model_directory() + "/" + self.seqrecord.id + '.ali'), "NOOOOOOOOOOOO!!!!: " + os.getcwd( ) + "/" + self.seqrecord.id + '.ali' aln.write(file=self.model_directory() + self.seqrecord.id + '.pap', alignment_format='PAP') return aligned_models
def align_res_nums(apo_pdb_file, apo_pdb_id, apo_chain_id, holo_pdb_file, holo_pdb_id, holo_chain_id): env = modeller.environ() aln = modeller.alignment(env) apo_model = modeller.model(env, file=apo_pdb_file, model_segment=("FIRST:%s" % (apo_chain_id), "LAST:%s" % (apo_chain_id))) aln.append_model(apo_model, atom_files=apo_pdb_id, align_codes="%s%s" % (apo_pdb_id, apo_chain_id)) holo_model = modeller.model(env, file=holo_pdb_file, model_segment=("FIRST:%s" % (holo_chain_id), "LAST:%s" % (holo_chain_id))) aln.append_model(holo_model, atom_files=holo_pdb_id, align_codes="%s%s" % (holo_pdb_id, holo_chain_id)) aln.salign() alignment_filename = "%s%s_%s%s_salign_output.ali" % ( apo_pdb_id, apo_chain_id, holo_pdb_id, holo_chain_id) aln.write(file=alignment_filename, alignment_format="PIR") with open(alignment_filename, "r") as alignment_opened: alignment_lines = alignment_opened.readlines() # Ignore the header lines. The format requires a 2-line header; there may be a blank line before this. if alignment_lines[0][0] == ">": line_index = 2 else: line_index = 3 apo_sequence_aligned = "" while True: next_line = alignment_lines[line_index].strip() apo_sequence_aligned += next_line if next_line[len(next_line) - 1] == "*": apo_sequence_aligned = apo_sequence_aligned[:-1] break line_index += 1 if alignment_lines[line_index + 1][0] == ">": line_index += 3 else: line_index += 4 holo_sequence_aligned = "" while True: next_line = alignment_lines[line_index].strip() holo_sequence_aligned += next_line if next_line[len(next_line) - 1] == "*": holo_sequence_aligned = holo_sequence_aligned[:-1] break line_index += 1 os.remove(alignment_filename) apo_pdb_res_numbers = get_numbers_from_pdb(apo_pdb_file, apo_chain_id) holo_pdb_res_numbers = get_numbers_from_pdb(holo_pdb_file, holo_chain_id) dict_key_apo_val_holo = {} holo_residues_passed = 0 # incremented whenever the iteration reaches a spot in the alignment where the holo sequence has a residue. apo_residues_passed = 0 for i in range(len(holo_sequence_aligned)): if (apo_sequence_aligned[i] != "-") and (holo_sequence_aligned[i] != "-"): #print(len(apo_pdb_res_numbers), apo_residues_passed, len(holo_pdb_res_numbers), holo_residues_passed) #print(apo_pdb_res_numbers, holo_pdb_res_numbers) #print(len(apo_sequence_aligned), len(holo_sequence_aligned), "len") #print(apo_sequence_aligned, holo_sequence_aligned) dict_key_apo_val_holo[apo_pdb_res_numbers[ apo_residues_passed]] = holo_pdb_res_numbers[ holo_residues_passed] holo_residues_passed += 1 apo_residues_passed += 1 elif (apo_sequence_aligned[i] != "-") and (holo_sequence_aligned[i] == "-"): dict_key_apo_val_holo[ apo_pdb_res_numbers[apo_residues_passed]] = "NA" apo_residues_passed += 1 elif (apo_sequence_aligned[i] == "-") and (holo_sequence_aligned[i] != "-"): holo_residues_passed += 1 print(dict_key_apo_val_holo) print(apo_sequence_aligned) print(holo_sequence_aligned) return dict_key_apo_val_holo
# This demonstrates using IMP.kernel.Restraints as additional energy terms in the # Modeller scoring function, so that IMP scoring terms can be incorporated into # existing comparative modeling pipelines. # import modeller import IMP import IMP.core import IMP.modeller # Set up Modeller and build a model from the GGCC primary sequence e = modeller.environ() e.edat.dynamic_sphere = False e.libs.topology.read('${LIB}/top_heav.lib') e.libs.parameters.read('${LIB}/par.lib') modmodel = modeller.model(e) modmodel.build_sequence('GGCC') # Set up IMP and load the Modeller model in as a new Hierarchy m = IMP.kernel.Model() protein = IMP.modeller.ModelLoader(modmodel).load_atoms(m) # Create a simple IMP distance restraint between the first and last atoms atoms = IMP.atom.get_by_type(protein, IMP.atom.ATOM_TYPE) r = IMP.core.DistanceRestraint(IMP.core.Harmonic(10.0, 1.0), atoms[0].get_particle(), atoms[-1].get_particle()) m.add_restraint(r) # Use the IMPRestraints class to add all of the IMP restraints to the # Modeller scoring function
def runmodeller(target,database_path='default',models_path='default',templates_path='default',working='default',\ mod_per_temp=20,excludes=[],max_seq_id=0.95,min_seq_id=0.25,max_eval=0.01,num_iter=1,gaps=False): '''File requirement: A .ali Sequence File and a pdb_95.pir database file Parameters: target: a .ali file path specifying the sequence of the protein models_path: the folder to save the models to. Defalut is ./models templates_path: the folder to save the templates to. Default is ./templates database: the path to the pdb_95.pir folder the default is ./database working: the working dir. The default is ./working mod_per_temp: number of models to be generated by modeller, the default is 20 excludes: list of strings specifying PDB files to be excluded from the templates max_seq_id: Maximum of the sequence identity for a template to be considered. Ranges from 0 to 1. Default is 0.95 min_seq_id: Minimum of the sequence identity for a template to be considered. Ranges from 0 to 1. Default is 0.25 This function returns a list of paths of the generated models ''' #set log to verbose modeller.log.verbose() env = modeller.environ() entering_dir = os.getcwd() #set paths if database_path == 'default': database_dir = os.path.abspath('./database') else: database_dir = os.path.abspath(database_path) target_file = os.path.basename(target) target_dir = os.path.dirname(os.path.abspath(target)) if target_file.endswith('.ali'): target_name = target_file[:-4] if working == 'default': if not os.path.exists('./working'): os.mkdir('./working') working_dir = os.path.abspath('./working') else: working_dir = os.path.abspath(working) if not os.path.exists(working_dir): os.mkdir(working_dir) if models_path == 'default': if not os.path.exists('./models'): os.mkdir('./models') models_dir = os.path.abspath('./models') else: models_dir = os.path.abspath(models_path) if not os.path.exists(models_dir): os.mkdir(models_dir) if templates_path == 'default': if not os.path.exists('./templates'): os.mkdir('./templates') template_dir = os.path.abspath('./templates') else: template_dir = os.path.abspath(templates_path) if not os.path.exists(templates_path): os.mkdir(templates_path) #cd to woring. script will cd back at the end os.chdir(working_dir) #-- Prepare the input files #-- Read in the sequence database sdb = modeller.sequence_db(env) sdb.read(seq_database_file=database_dir + '/pdb_95.pir', seq_database_format='PIR', chains_list='ALL', minmax_db_seq_len=(30, 4000), clean_sequences=True) #-- Write the sequence database in binary form sdb.write(seq_database_file=database_dir + '/pdb_95.bin', seq_database_format='BINARY', chains_list='ALL') #-- Now, read in the binary database sdb.read(seq_database_file=database_dir + '/pdb_95.bin', seq_database_format='BINARY', chains_list='ALL') #-- Read in the target sequence/alignment target_aln = modeller.alignment(env) target_aln.append(file=target_dir + '/' + target_file, alignment_format='PIR', align_codes='ALL') #-- Convert the input sequence/alignment into # profile format target_prf = target_aln.to_profile() #-- Scan sequence database to pick up homologous sequences target_prf.build(sdb, matrix_offset=-450, rr_file='${LIB}/blosum62.sim.mat', gap_penalties_1d=(-500, -50), n_prof_iterations=num_iter, check_profile=True, max_aln_evalue=max_eval, gaps_in_target=gaps) #-- Write out the profile in text format target_prf.write(file=working_dir + '/' + target_name + '_profile.prf', profile_format='TEXT') #-- Convert the profile back to alignment format target_aln = target_prf.to_alignment() #-- Write out the alignment file target_aln.write(file=working_dir + '/' + target_name + '_profile.ali', alignment_format='PIR') #CLEAN UP del sdb, target_aln, target_prf, env #Read the name of the templates templates = [] txt_input = open(working_dir + '/' + target_name + '_profile.prf', 'r') for eachline in txt_input: if eachline.lstrip(' ')[0] == '#': continue entries = eachline.split() if len(entries) != 13: continue if entries[2] != 'X': continue name = entries[1] seqid = float(entries[10]) templates.append(template(name, seqid)) txt_input.close() del entries, eachline, txt_input, name, seqid #Select templates i = 0 while (i < len(templates)): if( (templates[i].code in excludes) \ or (templates[i].seqid > max_seq_id*100) \ or (templates[i].seqid < min_seq_id*100) \ ): templates.pop(i) else: i += 1 #Download templates pdb for eachtemplate in templates: pdbname = eachtemplate.code.upper() url = 'http://www.rcsb.org/pdb/files/%s.pdb' % pdbname pdb_download = open(template_dir + '/' + pdbname.lower() + '.pdb', 'w') pdb_download.write(urllib.urlopen(url).read()) pdb_download.close() del pdb_download, pdbname, url #model alignment alnlist = [] for i in range(len(templates)): env = modeller.environ() aln = modeller.alignment(env) mdl = modeller.model(env, file=template_dir + '/' + templates[i].code, model_segment=('FIRST:' + templates[i].chain, 'LAST:' + templates[i].chain)) aln.append_model(mdl, align_codes=templates[i].name, atom_files=templates[i].filename) aln.append(file=target_dir + '/' + target_file, align_codes=target_name) aln.align2d() aln.write(file=working_dir + '/' + target_name + '-' + templates[i].name + '.ali', alignment_format='pir') aln.write(file=working_dir + '/' + target_name + '-' + templates[i].name + '.pap', alignment_format='pap') alnlist.append(working_dir + '/' + target_name + '-' + templates[i].name + '.ali') del i, aln, env #Make models filelist = [] for i in range(len(alnlist)): env = modeller.environ() env.io.atom_files_directory = [target_dir, working_dir, template_dir] a = modeller.automodel.automodel( env, alnfile=alnlist[i], knowns=templates[i].name, sequence=target_name, assess_methods=(modeller.automodel.assess.DOPE, modeller.automodel.assess.GA341)) a.starting_model = 1 a.ending_model = mod_per_temp a.make() for j in range(1, mod_per_temp + 1): scrname = target_name + '.B9999' + str(j).zfill(4) + '.pdb' tgtname = models_dir + '/' + target_name + '_' + templates[ i].code + '.B9999' + str(j).zfill(4) + '.pdb' os.rename(scrname, tgtname) filelist.append(tgtname) del env, a, scrname, tgtname, i, j os.chdir(entering_dir) return filelist
def align_res_nums(key_pdb_file, key_chain_id, value_pdb_file, value_chain_id): """Determine which residues in one PDB file correspond to which in another PDB file. Parameters ---------- key_pdb_file : string The location of the pdb file whose residue numbers will be keys in the returned dictionary. key_chain_id : string The chain of key_pdb_file that will be aligned. value_pdb_file : string The location of the pdb file whose residue numbers will be values in the returned dictionary. value_chain_id : string The chain of value_pdb_file that will be aligned. Returns ------- dict_residue_nums : dictionary{string : string} The keys and values are string-typed residue numbers (from key_pdb_file and value_pdb_file). Any residues that are missing from value_pdb_file will be assigned the value "NA". If any residues in key_pdb_file are classified as HETATMs, then they will only included in dict_residue_nums if they are MSE, MEX, or ABU. This matches MODELLER's behavior. """ # A temporary directory to store the output of Modeller's alignment. temp_dir_path = tempfile.mkdtemp() env = modeller.environ() aln = modeller.alignment(env) key_model = modeller.model(env, file=key_pdb_file, model_segment=("FIRST:%s" % (key_chain_id), "LAST:%s" % (key_chain_id))) aln.append_model(key_model, atom_files=key_pdb_file, align_codes="key%s" % (key_chain_id)) value_model = modeller.model(env, file=value_pdb_file, model_segment=("FIRST:%s" % (value_chain_id), "LAST:%s" % (value_chain_id))) aln.append_model(value_model, atom_files=value_pdb_file, align_codes="value%s" % (value_chain_id)) aln.salign() salign_out_loc = temp_dir_path + "key%s_value%s_salign_output.ali" % ( key_chain_id, value_chain_id) aln.write(file=salign_out_loc, alignment_format="PIR") with open(salign_out_loc, "r") as alignment_opened: alignment_lines = alignment_opened.readlines() # Ignore the header lines. The format requires a 2-line header; there may be a # blank line before this. if alignment_lines[0][0] == ">": line_index = 2 else: line_index = 3 key_sequence_aligned = "" while True: next_line = alignment_lines[line_index].strip() key_sequence_aligned += next_line if next_line[len(next_line) - 1] == "*": key_sequence_aligned = key_sequence_aligned[:-1] break line_index += 1 if alignment_lines[line_index + 1][0] == ">": line_index += 3 else: line_index += 4 value_sequence_aligned = "" while True: next_line = alignment_lines[line_index].strip() value_sequence_aligned += next_line if next_line[len(next_line) - 1] == "*": value_sequence_aligned = value_sequence_aligned[:-1] break line_index += 1 shutil.rmtree(temp_dir_path) key_pdb_res_numbers = get_numbers_from_pdb(key_pdb_file, key_chain_id) value_pdb_res_numbers = get_numbers_from_pdb(value_pdb_file, value_chain_id) dict_residue_nums = {} # value_residues_passed is incremented whenever the iteration reaches a spot in the # alignment where the value sequence has a residue. value_residues_passed = 0 key_residues_passed = 0 for i in range(len(value_sequence_aligned)): # If both key_sequence_aligned and value_sequence_aligned have residues at # the position, then add a dictionary entry mapping the residue number in key # to the residue number in value. if (key_sequence_aligned[i] != "-") and (value_sequence_aligned[i] != "-"): current_key_resnum = key_pdb_res_numbers[key_residues_passed] current_value_resnum = value_pdb_res_numbers[value_residues_passed] dict_residue_nums[current_key_resnum] = current_value_resnum value_residues_passed += 1 key_residues_passed += 1 # If key_sequence_aligned has a residue where value_sequence_aligned has a gap, # then create a dictionary entry with value NA. elif (key_sequence_aligned[i] != "-") and (value_sequence_aligned[i] == "-"): dict_residue_nums[key_pdb_res_numbers[key_residues_passed]] = "NA" key_residues_passed += 1 # If key_sequence_aligned has a gap where value_sequence_aligned has a residue, # then don't add a dictionary entry. elif (key_sequence_aligned[i] == "-") and (value_sequence_aligned[i] != "-"): value_residues_passed += 1 return dict_residue_nums
def peptide_rebuild_modeller(name, selection='all', hetatm=0, sequence=None, nmodels=1, hydro=0, quiet=1, *, _self=cmd): ''' DESCRIPTION Remodel the given selection using modeller. This is useful for example to build incomplete sidechains. More complicated modelling tasks are not the intention of this simple interface. Side effects: Alters "type" property for MSE residues in selection (workaround for bug #3512313). USAGE peptide_rebuild_modeller name [, selection [, hetatm [, sequence ]]] ARGUMENTS name = string: new object name selection = string: atom selection hetatm = 0/1: read and model HETATMs (ligands) {default: 0} sequence = string: if provided, use this sequence instead of the template sequence {default: None} nmodels = int: number of models (states) to generate {default: 1} ''' import modeller from modeller.automodel import automodel, allhmodel import tempfile, shutil, os _assert_package_import() from .editing import update_identifiers nmodels, hetatm, quiet = int(nmodels), int(hetatm), int(quiet) if int(hydro): automodel = allhmodel tempdir = tempfile.mkdtemp() pdbfile = os.path.join(tempdir, 'template.pdb') alnfile = os.path.join(tempdir, 'aln.pir') cwd = os.getcwd() os.chdir(tempdir) if not quiet: print(' Notice: PWD=%s' % (tempdir)) try: modeller.log.none() env = modeller.environ() env.io.hetatm = hetatm # prevent PyMOL to put TER records before MSE residues (bug #3512313) _self.alter('(%s) and polymer' % (selection), 'type="ATOM"') _self.save(pdbfile, selection) mdl = modeller.model(env, file=pdbfile) aln = modeller.alignment(env) aln.append_model(mdl, align_codes='foo', atom_files=pdbfile) # get sequence from non-present atoms if not sequence and _self.count_atoms('(%s) & !present' % (selection)): sequence = get_seq(selection) if sequence: aln.append_sequence(sequence) aln[-1].code = 'bar' aln.malign() aln.write(alnfile) a = automodel(env, alnfile=alnfile, sequence=aln[-1].code, knowns=[s.code for s in aln if s.prottyp.startswith('structure')]) a.max_ca_ca_distance = 30.0 if nmodels > 1: a.ending_model = nmodels from multiprocessing import cpu_count ncpu = min(cpu_count(), nmodels) if ncpu > 1: from modeller import parallel job = parallel.job(parallel.local_slave() for _ in range(ncpu)) a.use_parallel_job(job) a.make() for output in a.outputs: _self.load(output['name'], name, quiet=quiet) finally: os.chdir(cwd) shutil.rmtree(tempdir) _self.align(name, selection, cycles=0) if not sequence: update_identifiers(name, selection, _self=_self) if not quiet: print(' peptide_rebuild_modeller: done')
off1 = int(sys.argv[3]) off2 = int(sys.argv[4]) pdb_to_uniprot = modelutils.read_pdb_to_uniprot(pdbfile, chain) uniprot_to_pdb = dict([(v, k) for (k, v) in pdb_to_uniprot.iteritems()]) off1 = uniprot_to_pdb[off1] off2 = uniprot_to_pdb[off2] # Supress verbose version notice with open("/dev/null", "w") as fnull: oldout = sys.stdout sys.stdout = fnull env = modeller.environ() mdl = modeller.model(env, file=pdbfile, model_segment=('FIRST:' + chain, 'LAST:' + chain)) sys.stdout = oldout def find_res(off): match = [res for res in mdl.residues if int(res.num) == off] if len(match) != 1: raise Exception("Found %d residues with PDB offset %d" % (len(match), off)) return match[0] res1 = find_res(off1)
help='mobile pdb structure file to transfer sequence on', type=str) parser.add_argument( '-r', '--ref', help='reference pdb structure file with sequence to transfer', type=str) args = parser.parse_args() env = modeller.environ() lib = '/usr/lib/modeller9.23/modlib' env.libs.topology.read(file=f'{lib}/top_heav.lib') env.libs.parameters.read(file=f'{lib}/par.lib') aln = modeller.alignment(env) target = modeller.model(env, file=args.target) target_name = os.path.basename(args.target).split('.')[0] aln.append_model(target, align_codes=target_name) ref = modeller.model(env, file=args.ref) ref_name = os.path.basename(args.ref).split('.')[0] aln.append_model(ref, align_codes=ref_name) aln.align() # aln.align3d() alnfile = f'{target_name}_{ref_name}.seq' aln.write(file=alnfile) mdl = modeller.model(env) mdl.generate_topology(aln[ref_name]) # Assign the average of the equivalent template coordinates to MODEL:
def modelMissingAtoms(self, pdbFilename, outputFilename, chain=' ', debug = False, allHydrogen = False): """Model missing atoms/residues in a specified PDB file using MODELLER. REQUIRED ARGUMENTS pdbFilename - the filename of the PDB file to model missing atoms and residues for outputFilename - the filename for the desired final model OPTIONAL ARGUMENTS chain - the one-character chain ID of the chain to model (default ' ') debug - flag to print extra debug output and leave temporary directory (default False) NOTES The specified chain from pdbFilename is processed through MODELLER to build missing atoms and residues specified in the SEQRES entry of the PDB file but not present in the PDB file. This procedure is loosely based on the protocol appearing at http://salilab.org/modeller/wiki/Missing_residues The complete sequence is read from the SEQRES fields, and the DBREF field used to determine the span of residues described in the SEQRES fields. A heavy-atom topology as constructed in MODELLER for the complete sequence, coordinates present in the PDB file transferred, and the remaining heavy-atom coordinates built from ideal geometry. Finally, a single standard simulated-annealing-based modeling step is performed using the standard automodel protocol but allowing only the atoms and residues that were undefined in the PDB file to move. """ # Ensure specified PDB file exists. import os.path if not os.path.exists(pdbFilename): raise ParameterException, "Specified PDB file %s not found." % pdbFilename # Append full path to pdbFilename and outputFilename import os.path pdbFilename = os.path.abspath(pdbFilename) outputFilename = os.path.abspath(outputFilename) # Create a temporary directory for running MODELLER. import tempfile import os.path tmpdir = tempfile.mkdtemp() if debug: print "tmpdir = %s" % tmpdir # Get the complete sequence without chain breaks from the SEQRES/DBREF fields of the source PDB file. first_residue_id, complete_sequence = self.getCompleteSequence(pdbFilename, chain) nresidues = len(complete_sequence) last_residue_id = first_residue_id + nresidues - 1 # Get the sequence of residues that are at least partially present in the PDB file as a dictionary. # present_sequence_dict[residue_id] is the one-letter-code of the residue residue_id, if there are any ATOM records for this residue. present_sequence_dict = self.getPresentSequence(pdbFilename, chain) # Generate alignment of the template sequence (residues for which any coordinates are defined) against the target (complete sequence from SEQRES/DBREF) present_sequence = "" for residue_id in range(first_residue_id, first_residue_id + nresidues): if present_sequence_dict.has_key(residue_id): # TODO: Check integrity against complete_sequence. present_sequence += present_sequence_dict[residue_id] else: present_sequence += '-' # Change working directory to temporary directory. import os olddir = os.getcwd() os.chdir(tmpdir) # Generate alignment file for MODELLER. import os alignment_filename = os.path.join(tmpdir, 'model.ali') alignment_file = open(alignment_filename, 'w') print >> alignment_file, ">P1;%s" % "template" print >> alignment_file, "%s:%s:%d:%s:%d:%s:%s:%s:%s:%s" % ( "structure", pdbFilename, min(present_sequence_dict.keys()), chain, max(present_sequence_dict.keys()), chain, " ", " ", " ", " " ) print >> alignment_file, "%s*" % present_sequence print >> alignment_file, "" print >> alignment_file, ">P1;%s" % "target" print >> alignment_file, "%s:%s:%d:%s:%d:%s:%s:%s:%s:%s" % ( "sequence", "target", first_residue_id, chain, last_residue_id, chain, " ", " ", " ", " " ) print >> alignment_file, "%s*" % complete_sequence alignment_file.close() if debug: import commands print "alignment file:" print commands.getoutput('cat %(alignment_filename)s' % vars()) # Call MODELLER to generate topology, transfer coordinates, and build from internal coordinates. import modeller import modeller.automodel # Create a new environemnt. env = modeller.environ() # Specify the topology and parameters to use. # TODO: Is this necessary, or can we rely on the defaults? env.libs.topology.read(file='$(LIB)/top_heav.lib') env.libs.parameters.read(file='$(LIB)/par.lib') # Read in alignment. aln = modeller.alignment(env) print alignment_filename aln.append(file=alignment_filename, align_codes='all') # Create a model. model = modeller.model(env) # Generate the topology from the target sequence. model.generate_topology(aln['target']) # Transfer defined coordinates from template. model.transfer_xyz(aln) # Determine which atoms are undefined because they are missing in the template, and create a selection from them. missing_atom_indices = [] for atom_index in range(len(model.atoms)): atom = model.atoms[atom_index] if atom.x == -999: missing_atom_indices.append(atom_index) # DEBUG: Write model coordinates to a PDB file. model.write(file=os.path.join(tmpdir,'transferred.pdb')) # Build the remaining undefined atomic coordinates from ideal internal coordinates stored in residue topology files. model.build(initialize_xyz=False, build_method='INTERNAL_COORDINATES') # DEBUG: Write model coordinates to a PDB file. if debug: model.write(file=os.path.join(tmpdir,'built.pdb')) # Override the 'select_atoms' routine in the 'automodel' class to select only the atoms with undefined atomic coordinates in template PDB. if (allHydrogen): class mymodel(modeller.automodel.allhmodel): def select_atoms(self): missing_atoms = modeller.selection() for atom_index in missing_atom_indices: missing_atoms.add(self.atoms[atom_index]) return missing_atoms else: class mymodel(modeller.automodel.automodel): def select_atoms(self): missing_atoms = modeller.selection() for atom_index in missing_atom_indices: missing_atoms.add(self.atoms[atom_index]) return missing_atoms # Ensure selected atoms feel all nonbonded interactions. env.edat.nonbonded_sel_atoms = 1 # Set up automodel. #a = mymodel(env, inifile='built.pdb', alnfile=alignment_filename, knowns='template', sequence='target') a = mymodel(env, alnfile=alignment_filename, knowns='template', sequence='target') # Set parameters for automodel. # Build only one model. # TODO: Have more models built by default (perhaps 50?) a.starting_model = 1 a.ending_model = 1 # Generate model(s). a.make() # TODO: Rescore models and select the best one. # For now, we only use the first model. final_model_summary = a.outputs[0] # Copy resulting model to desired output PDB filename. import shutil shutil.copy(final_model_summary['name'], outputFilename) # Restore working directory. os.chdir(olddir) # Clean up temporary directory. if (not debug): for filename in os.listdir(tmpdir): os.remove(os.path.join(tmpdir,filename)) os.rmdir(tmpdir) return
def _build_models(structfname, basedir, nmodels, refstructure, verbose, seq_rep_list): """ Builds replicate structural models of a list of protein sequences. seq_rep_list is a list of (sequence,replicates) pairs, giving each sequence object to be modeled and the number of replicates needed for that sequence object SIDE EFFECT: models are placed in basedir/sequence_id directory """ # set up path links, assuming current working directory workingdir = os.getcwd() structfname = os.path.normpath(os.path.join(workingdir, structfname)) basedir = os.path.normpath(os.path.join(workingdir, basedir)) # calculate total number of reps for each sequence id reps_per_id = {} for seq,reps in seq_rep_list: if seq.identifier in reps_per_id.keys(): reps_per_id[seq.identifier] += reps else: reps_per_id[seq.identifier] = reps for seq,reps in seq_rep_list: # calculate some information on total reps for this id and how many # models to build for this particular sequence total_reps_needed = reps_per_id[seq.identifier] models_per_rep = round(nmodels / total_reps_needed) if models_per_rep < 1: models_per_rep = 1 mynmodels = models_per_rep * reps # check this sequence's existing structures; bail out if done mindex = 1 outdir = basedir + os.path.sep + seq.identifier if not os.path.isdir(outdir): os.makedirs(outdir) else: existing_fnames = [ x.split(os.path.sep)[-1] for x in \ glob.glob(outdir + os.path.sep + 'rep*.pdb') ] existing_reps = [ int(x.split('rep')[1].split('.pdb')[0]) for \ x in existing_fnames] if existing_reps: existing_reps.sort(reverse=True) last_rep = existing_reps[0] if last_rep < total_reps_needed: mindex = existing_reps[0] + 1 else: continue # set up temporary directory for modeller execution with tempfile.TemporaryDirectory(prefix=dnameprefix) as tempdir: os.chdir(tempdir) # set up modeller environment if verbose: modeller.log.verbose() else: modeller.log.none() env = modeller.environ() env.io.atom_files_directory = [workingdir] # set up complete alignment aln = modeller.alignment(env) aln.append(file=structfname, remove_gaps=False) knowns = [s.code for s in aln] aln.append_sequence(seq.sequence) aln[-1].code = seq.identifier # write alignment - modeller doesn't like alignment in memory full_aln_fname = 'structaligntemp.ali' aln.write(full_aln_fname, alignment_format='PIR') # set up model assessments ASSESS_METHODS = [modeller.automodel.assess.DOPE, modeller.automodel.assess.DOPEHR] ASSESS_NAMES = ["DOPE score", "DOPE-HR score"] a = modeller.automodel.dope_loopmodel(env, alnfile=full_aln_fname, knowns=knowns, sequence=seq.identifier, assess_methods=ASSESS_METHODS) a.starting_model = 1 # index of the first model a.ending_model = mynmodels # index of the last model # adjust optimization parameters a.library_schedule = modeller.automodel.autosched.slow a.md_level = modeller.automodel.refine.slow a.make() # do homology modeling # evaluate structural models ok_models = [ x for x in a.outputs if x["failure"] is None ] score_results = [] for data in ok_models: fname = data["name"] myscrs = [] for score_name in ASSESS_NAMES: myscrs.append(data[score_name]) ave_score = sum(myscrs) / len(myscrs) score_results.append((ave_score, fname, myscrs)) score_results.sort() best_models = score_results[:reps] rest_models = score_results[reps:] # map to reference structure refseq = aln[0] if refstructure: refseq = aln[refstructure] refcode = refseq.code refpdbf = refseq.atom_file refrange = refseq.range refmdl = modeller.model(env, file=refpdbf, model_segment=refrange) refpos = modeller.selection(refmdl).only_atom_types('CA') # get best models final_files = [] for (score,infname,scores) in best_models: outfname = outdir + os.path.sep + 'rep{}.pdb'.format(mindex) final_files.append(outfname) # build alignment myaln = modeller.alignment(env) myaln.append(file=structfname, align_codes=(refcode), remove_gaps=False) myaln.append_sequence(seq.sequence) myaln[-1].code = seq.identifier myaln[-1].atom_file = infname # read pdb file mymodel = modeller.model(env, file=infname) # translate to reference coordinates r = refpos.superpose(mymodel, myaln) # write translated pdb file mymodel.write(file=outfname) mindex += 1 os.chdir(workingdir) return