Ejemplo n.º 1
0
 def test_script9(self):
     """Test step 9 (multiple fitting)"""
     # Get inputs (outputs from step 8)
     for i in ('top', 'bottom'):
         shutil.copy('precalculate_results/stage8_split_density/' \
                     'groel-11.5A.%s.mrc' % i, 'output')
     # Make sure the script runs without errors
     p = subprocess.check_call(['scripts/' \
                                'script9_symmetric_multiple_fitting.py'])
     e = modeller.environ()
     ref = modeller.model(e,
            file='precalculate_results/stage9_symmetric_multiple_fitting/' \
                 'model.top.0.pdb')
     sel = modeller.selection(ref).only_atom_types('CA')
     # At least one model in each ring should be close to the reference
     for side in ('top', 'bottom'):
         rms = []
         for i in range(6):
             fname = 'output/model.%s.%d.pdb' % (side, i)
             m =  modeller.model(e, file=fname)
             a = modeller.alignment(e)
             a.append_model(ref, align_codes='ref')
             a.append_model(m, align_codes='model')
             rms.append(sel.superpose(m, a).rms)
             os.unlink(fname)
         self.assertTrue(min(rms) < 10.0)
     os.unlink('output/intermediate_asmb_sols.out')
     for side in ('top', 'bottom'):
         os.unlink('output/multifit.%s.output' % side)
         os.unlink('output/multifit.%s.output.symm.ref' % side)
         os.unlink('output/multifit.%s.param' % side)
Ejemplo n.º 2
0
    def find(self):
        """Return a Modeller selection corresponding to the allosteric site.
           @raise AllostericSiteError on error."""
        if self.__allosteric_site is None:
            # align PDB2 to PDB1 and superimpose antigen
            try:
                salign0(self.env, self.pdb1, self.pdb2)
            except modeller.ModellerError as err:
                raise AllostericSiteError("Could not align %s with %s: %s. "
                               "This is usually due to a poor alignment."
                               % (self.pdb2, self.pdb1, str(err)))
            pmfit = get_fit_filename(self.pdb2)

            # determine residues in PDB2 that contact LIG1
            self.__pmfit = modeller.model(self.env, file=pmfit)
            lig1 = modeller.model(self.env, file=self.ligand)
            self.__allosteric_site = \
                 modeller.selection([ri for ri, rj, dist \
                            in get_inter_contacts(self.env, self.__pmfit, lig1,
                                                  self.rcut)])
            os.unlink(pmfit)
            os.unlink(get_fit_filename(self.pdb1))
            if len(self.__allosteric_site) == 0:
                raise AllostericSiteError("No allosteric site found")
        return self.__allosteric_site
Ejemplo n.º 3
0
 def test_script6(self):
     """Test step 6 (model building and assessment)"""
     # Get inputs (outputs from steps 3 and 5)
     shutil.copy('precalculate_results/stage3_density_segmentation/' \
                 'groel_subunit_11.mrc', 'output')
     shutil.copy('precalculate_results/stage5_template_alignment/' \
                 'groel-1iokA.ali', 'output')
     # Make sure the script runs without errors
     p = subprocess.check_call(['scripts/' \
                                'script6_model_building_and_assessment.py'])
     # Check output models
     e = modeller.environ()
     for i in range(1, 11):
         base = 'output/P0A6F5.B9999%04d' % i
         pdb = base + '.pdb'
         trunc_pdb = base + '.truncated.pdb'
         trunc_fit_pdb = base + '.truncated.fitted.pdb'
         m = modeller.model(e, file=pdb)
         self.assertEqual(len(m.residues), 548)
         m = modeller.model(e, file=trunc_pdb)
         self.assertEqual(len(m.residues), 524)
         m = modeller.model(e, file=trunc_fit_pdb)
         self.assertEqual(len(m.residues), 524)
         os.unlink(pdb)
         os.unlink(trunc_pdb)
         os.unlink(trunc_fit_pdb)
     scores = 'output/model_building.scores.output'
     wc = len(open(scores).readlines())
     # Should be one line for each of 10 models, plus a header
     self.assertEqual(wc, 11)
     os.unlink(scores)
Ejemplo n.º 4
0
def align_template_to_reference(msmseed, ref_msmseed):
    import modeller
    import tempfile
    import shutil
    import copy
    import os
    temp_dir = tempfile.mkdtemp()
    try:
        os.chdir(temp_dir)
        alignment_file = open('aln_tmp.pir','w')
        aln = _PIR_alignment(ref_msmseed.template_sequence, ref_msmseed.template_id, msmseed.template_sequence, msmseed.template_id)
        alignment_file.writelines(aln)
        alignment_file.close()
        template_file = open(msmseed.template_id + '.pdb','w')
        template_pdb = msmseed.template_structure
        template_pdb.writeFile(template_pdb.topology, template_pdb.positions, template_file)
        template_file.close()
        ref_pdb = ref_msmseed.template_structure
        ref_file = open(ref_msmseed.template_id + '.pdb', 'w')
        ref_pdb.writeFile(ref_pdb.topology, ref_pdb.positions, ref_file)
        ref_file.close()
        modeller.log.none()
        env = modeller.environ()
        env.io.atom_files_directory = temp_dir
        aln = modeller.alignment(env, file='aln_tmp.pir', align_codes=(ref_msmseed.template_id, msmseed.template_id))
        mdl  = modeller.model(env, file=ref_msmseed.template_id + '.pdb')
        mdl2 = modeller.model(env, file=msmseed.template_id+'.pdb')
        atmsel = modeller.selection(mdl).only_atom_types('CA')
        r = atmsel.superpose(mdl2, aln)
        msmseed.rmsd_to_reference = copy.deepcopy(r.rms)
    except Exception as e:
        msmseed.error_message = e.message
    finally:
        shutil.rmtree(temp_dir)
    return msmseed
Ejemplo n.º 5
0
def file_to_model(pdbfile, chain):
    if chain is None:
        return modeller.model(env, file=pdbfile)
    else:
        return modeller.model(env,
                              file=pdbfile,
                              model_segment=('FIRST:%s' % chain,
                                             'LAST:%s' % chain))
Ejemplo n.º 6
0
    def test_integrative_modeling(self):
        """Test the entire integrative modeling run"""
        import modeller
        # Compile the clustering program
        subprocess.check_call(['gfortran', 'cluster.f', 'u3best.f',
                               '-o', 'cluster.x'],
                              cwd='integrative_modeling/bin')

        # Run sampling
        subprocess.check_call(['./run_modeling.py'],
                              cwd='integrative_modeling')

        # Analysis
        subprocess.check_call(['bin/get_frames.sh'],
                              cwd='integrative_modeling')

        # Make sure that at least two of the three "known good" clusters
        # are reproduced
        clusters = glob.glob('integrative_modeling/clustering/clus.*.pdb')
        clusters = [x for x in clusters if '-' not in x]
        exp_clusters = glob.glob('model_refinement/cluster*/model.pdb')

        env = modeller.environ()
        n_cluster = 0
        rms = []
        cluster_match = [0] * len(clusters)
        exp_cluster_match = [0] * len(exp_clusters)
        # Get a matrix of RMSD between all clusters and the expected clusters
        for ncluster, cluster in enumerate(clusters):
            per_cluster = []
            for nexp_cluster, exp_cluster in enumerate(exp_clusters):
                mc = modeller.model(env, file=cluster)
                s = modeller.selection(mc)
                a = modeller.alignment(env)
                me = modeller.model(env, file=exp_cluster)
                a.append_model(mc, align_codes='clus')
                a.append_model(me, align_codes='exp_clus')
                # We only care about the global (non-cutoff) RMSD, so use a
                # large cutoff so that refine_local doesn't increase the number
                # of equivalent positions at the expense of worsening the RMSD
                r = s.superpose(me, a, rms_cutoff=999.)
                if r.rms < 15.0:
                    cluster_match[ncluster] += 1
                    exp_cluster_match[nexp_cluster] += 1
                per_cluster.append(r.rms)
            rms.append(per_cluster)
        # Count the number of clusters which are close to an expected cluster
        ncluster_match = len(cluster_match) - cluster_match.count(0)
        # Count the number of expected clusters which are close to a cluster
        nexp_cluster_match = len(exp_cluster_match) - exp_cluster_match.count(0)
        # Make sure that at least 2 of the 3 expected clusters is close to one
        # of the clusters we produced (but not all the *same* cluster)
        self.assertTrue(ncluster_match >= 2 and nexp_cluster_match >= 2,
                        "Could not find any match between the %d clusters "
                        "found in this test and 2 of the 3 'known good' "
                        "clusters (match defined as all-atom RMSD less than "
                        "15.0A). RMSD matrix: %s" % (len(clusters), str(rms)))
Ejemplo n.º 7
0
def perform_sequence_alignment():
    e = modeller.environ()
    m1 = modeller.model(e, file='experimental.pdb')
    m2 = modeller.model(e, file='rosetta.pdb')
    aln = modeller.alignment(e)
    aln.append_model(m1, align_codes='experimental', atom_files='experimental.pdb')
    aln.append_model(m2, align_codes='rosetta')
    aln.align2d()
    aln.write(file='align.ali', alignment_format='PIR')
Ejemplo n.º 8
0
def refined_vs_notrefined(models_dir, dope_profile):
    """Creates a comparison energy plot between the model generated by the program and the refined one."""
    env = environ()
    env.io.atom_files_directory = [models_dir]

    mdl_list = []
    aln = modeller.alignment(env)
    code_list = []

    mdl_nr_list = []
    aln_nr = modeller.alignment(env)
    code_list_nr = []

    for file in os.listdir(models_dir):
        name = file
        if name.endswith('.B'):
            mdl = modeller.model(env)
            mdl.read(file = file)
            code = str(file)
            code_list.append(code)
            s = selection(mdl)
            s.assess_dope(output='ENERGY_PROFILE NO_REPORT', file= models_dir + code + '.profile',
                  normalize_profile=True, smoothing_window=15)
            mdl_list.append(mdl)
            aln.append_model(mdl, align_codes = code, atom_files = code)
            aln.write(file=dope_profile+'build_profile_ref.ali', alignment_format='PIR')
        else:
            mdl_nr = modeller.model(env)
            mdl_nr.read(file = file)
            code = str(file)
            print (code)
            code_list_nr.append(code)
            t = selection(mdl_nr)
            t.assess_dope(output='ENERGY_PROFILE NO_REPORT', file= models_dir + code + '.profile',
                  normalize_profile=True, smoothing_window=15)
            mdl_nr_list.append(mdl_nr)
            aln_nr.append_model(mdl_nr, align_codes = code, atom_files = code)
            aln_nr.write(file=dope_profile+'build_profile_notref.ali', alignment_format='PIR')

    if len(mdl_nr_list) == len(mdl_list):
        for a, b, c, d in zip(mdl_nr_list, mdl_list, code_list_nr, code_list):
            model1 = get_profile(models_dir + c + ".profile", aln_nr[str(c)])
            model2 = get_profile(models_dir + d + ".profile", aln[str(d)])
            pylab.figure(1, figsize=(30,18))
            pylab.xlabel('Alignment position', fontsize = 20)
            pylab.ylabel('DOPE per-residue score', fontsize = 20)
            pylab.plot(model1, color='red', linewidth=2, label='Model')
            pylab.plot(model2, color='green', linewidth=2, label='Optimized model')
            pylab.legend(fontsize = 20)
            pylab.savefig(dope_profile + c + '.dope_profile.jpg', dpi=100)
            pylab.close()

    return ("Comparison energy plot between refined and not refined model has been created here:\n%s\n" % (dope_profile))
Ejemplo n.º 9
0
def main():
    import modeller
    file1, file2, rcut = parse_args()
    e = modeller.environ()
    e.io.hetatm = True
    mdl1 = modeller.model(e, file=file1)
    mdl2 = modeller.model(e, file=file2)
    for ri, rj, dist in get_inter_contacts(e, mdl1, mdl2, rcut):
        print("  %6s  %2s  %6s  %2s  %3s  %3s%3d%11.3f  %1d  %1d"
              % (ri.num, ri.chain.name, rj.num, rj.chain.name,
                 ri.pdb_name, rj.pdb_name, get_contact_type(ri, rj),
                 dist, 6, 6))
Ejemplo n.º 10
0
 def test_feature_sidechain_biso(self):
     """Check average sidechain Biso feature"""
     env = self.get_environ()
     mlib = self.get_mdt_library()
     self.assertRaises(ValueError, mdt.features.SidechainBiso, mlib, bins=mdt.uniform_bins(5, 0, 10), protein=3)
     sidechain_biso = mdt.features.SidechainBiso(mlib, bins=mdt.uniform_bins(5, 0, 10))
     mdl = modeller.model(env)
     mdl.build_sequence("A")
     aln = modeller.alignment(env)
     aln.append_model(mdl, align_codes="test")
     s = aln[0]
     # Mainchain atom Biso should be ignored:
     for mainchain in ("N:1", "C:1", "O:1", "OXT:1", "CA:1"):
         s.atoms[mainchain].biso = 1000
     for (biso, bin) in (
         (22, 2),
         (32, 3),  # Map regular values to bins
         (0, -1),  # Zero Biso should be "undefined"
         (1, 3),
     ):  # Biso < 2 is multiplied by 4pi^2
         s.atoms["CB:1"].biso = biso
         m = mdt.Table(mlib, features=sidechain_biso)
         m.add_alignment(aln)
         self.assertEqual(m.shape, (6,))
         self.assertEqual(m.sum(), 1)
         self.assertEqual(m[bin], 1)
Ejemplo n.º 11
0
def main(args):
    mod.log.verbose()
    env = mod.environ()
    env.io.atom_files_directory = [".", args.dir, "../" + args.dir]
    aln = mod.alignment(env)

    mdl = mod.model(
        env,
        file=args.template,
        model_segment=(
            "FIRST:" + args.chains[0].upper(),
            "LAST:" + args.chains[1].upper(),
        ),
    )
    aln.append_model(
        mdl, align_codes=args.template.replace(".pdb", ""), atom_files=args.template
    )

    sequence_file = os.path.join(args.dir, args.target)
    sequence_code = args.target.replace(".ali", "")
    aln.append(file=sequence_file, align_codes=sequence_code)

    aln.align2d()  # perform alignment
    align_file = os.path.join(
        args.dir, sequence_code + "-" + args.template.replace(".pdb", "")
    )
    aln.write(file=align_file + ".ali", alignment_format="PIR")  # para o modeller
    aln.write(file=align_file + ".pap", alignment_format="PAP")  # +fácil de ler

    # check files
    aln.check()
Ejemplo n.º 12
0
    def test_glyc(self):
        """Test glycosylation benchmark"""
        os.chdir(os.path.join(TOPDIR, 'benchmark', 'input_glyc'))
        # Cleanup anything left over from a previous run
        shutil.rmtree('pred_dECALCrAS1000', ignore_errors=True)

        subprocess.check_call(['allosmod', 'setup'])
        # Setup should generate ligand and script:
        for f in ['lig.pdb', 'qsub.sh']:
            self.assertTrue(os.path.exists(f))
        # Run the protocol
        subprocess.check_call(['/bin/sh', '--login', './qsub.sh'])
        # Should generate more files:
        os.chdir('pred_dECALCrAS1000/2AAS.pdb_0')
        for f in ['align2.ali', 'allosmod.py', 'converted.rsr',
                  'model_glyc.log', 'model_glyc.py', 'pm.pdb.B99990001.pdb',
                  'pm.pdb.D00000001', 'pm.pdb.V99990001', 'run.log']:
            self.assertTrue(os.path.exists(f))

        # Generated model should have sugars added to second chain
        e = modeller.environ()
        e.io.hetatm = True
        m = modeller.model(e, file='pm.pdb.B99990001.pdb')
        self.assertEqual(len(m.chains), 2)
        self.assertEqual(len(m.chains[0].residues), 124)
        self.assertEqual(len(m.chains[1].residues), 16)
        self.assertEqual([r.name for r in m.chains[1].residues],
                         ['NAG', 'NAG', 'BMA', 'MAN', 'MAN', 'MAN', 'MAN',
                          'MAN', 'NAG', 'NAG', 'BMA', 'MAN', 'MAN', 'MAN',
                          'MAN', 'MAN'])
Ejemplo n.º 13
0
 def setup_atoms(self, env):
     self.m = modeller.model(env, file=self.pdb_file)
     self.atoms = [Atom(a) for a in self.m.atoms]
     self.contacts = get_contacts(self.contacts_pdbs, self.rcut)
     if self.break_file:
         self.breaks = get_breaks(open(self.break_file))
     else:
         self.breaks = {}
     self.beta_structure = get_beta(self.pdb_file)
     NUCLEIC_ACIDS = dict.fromkeys(['ADE', 'A', 'DA', 'THY', 'T', 'DT',
                                    'URA', 'U', 'DU', 'GUA', 'G', 'DG',
                                    'CYT', 'C', 'DC'])
     BACKBONE_ATOMS = dict.fromkeys(['CA', 'CB', 'O', 'N', 'C', 'OT', 'NA',
                                     'NB', 'NC', 'ND', 'C1A', 'C2A', 'C3A',
                                     'C4A', 'C1B', 'C2B', 'C3B', 'C4B',
                                     'C1C', 'C2C', 'C3C', 'C4C', 'C1D',
                                     'C2D', 'C3D', 'C4D'])
     for a in self.atoms:
         r = a.a.residue
         if r.pdb_name in NUCLEIC_ACIDS:
             a.isNUC = True
             a.torestr = get_nuc_restrained(a.a.name, r.pdb_name)
             for rj in range(1, len(self.m.residues) + 1):
                 self.contacts[(r.index,rj)] = True
         if a.a.name in BACKBONE_ATOMS or r.pdb_name in NUCLEIC_ACIDS:
             a.isSC = False
             a.isCA = a.a.name == 'CA'
             a.isCB = a.a.name == 'CB'
         else:
             a.isSC = a.a.name != 'H'
     for a, asrs in zip(self.atoms,
                        parse_atomlist_asrs(open(self.atomlist_asrs))):
         a.isAS = asrs
Ejemplo n.º 14
0
def DOPE_profiles_maker(temp_dir, outputs):
    """Creates a DOPE profile plot (.jpg) from a macrocomplex (.pdb), which has no acid nucleic chains using Modeller."""
    flist = []
    env = environ()
    env.io.atom_files_directory = [temp_dir]
    dl = os.listdir(temp_dir)

    for file in dl:
        if file.startswith("mod"):
            flist.append(file)

    aln = modeller.alignment(env)

    for file in flist:
        mdl = modeller.model(env)
        code = str(file)
        mdl.read(file=code, model_segment=('FIRST:@', 'END:'))
        aln.append_model(mdl, align_codes=code, atom_files=code)
        t = selection(mdl)
        file_dope = outputs + code + '.profile'
        t.assess_dope(output='ENERGY_PROFILE NO_REPORT', file=file_dope, normalize_profile=True, smoothing_window=15)
        model = get_profile(file_dope, aln[str(file)])
        pylab.figure(1, figsize=(20, 12))
        pylab.xlabel('Alignment position', fontsize=20)
        pylab.ylabel('DOPE per-residue score', fontsize=20)
        pylab.plot(model, color='green', linewidth=3, label=file[3:-4])
        pylab.savefig(outputs + file[:-4] + '.dope_profile.jpg', dpi=100)
        pylab.close()

    path_img = outputs + file[:-4] + '.dope_profile.jpg'
    return("DOPE profile plot for model created here:\n  %s\n" % (path_img))
Ejemplo n.º 15
0
def get_sas(pdb,probe):
    import modeller

    # Read the PDB file
    env = modeller.environ()
    mdl = modeller.model(env)
    mdl.read(file=pdb)

    # Calculate atomic accessibilities (in Biso) with appropriate probe_radius
    myedat = modeller.energy_data()
    myedat.radii_factor = 1.6
    mdl.write_data(edat=myedat, output='PSA ATOMIC_SOL',
                   psa_integration_step=0.05, probe_radius=probe)

    mdl.write(file=pdb.rsplit('.',1)[0]+'.sas')

    # read SAS
    with open('%s.sas' % (pdb.rsplit('.',1)[0], )) as data:
        D = data.readlines()

    Sas = {}
    for d in D:
        d = d.strip()
        if d[:4]=='ATOM':
            atom, res, resid, cid = d[12:16], d[17:20], int(d[22:26]), d[21]
            if cid == ' ':
                cid='A'
            Sas[(atom,res,resid,cid)] = float(d[60:66])
    return Sas
Ejemplo n.º 16
0
def save_modeller_output_files(target,
                               model_dir,
                               a,
                               env,
                               model_pdbfilepath,
                               model_pdbfilepath_uncompressed,
                               write_modeller_restraints_file=False):
    # save PDB file
    # Note that the uncompressed pdb file needs to be kept until after the clustering step has completed
    tmp_model_pdbfilepath = a.outputs[0]['name']
    target_model = modeller.model(env, file=tmp_model_pdbfilepath)
    target_model.write(file=model_pdbfilepath_uncompressed)
    with open(model_pdbfilepath_uncompressed) as model_pdbfile:
        with gzip.open(model_pdbfilepath, 'w') as model_pdbfilegz:
            model_pdbfilegz.write(model_pdbfile.read())

    # Write sequence identity.
    seqid_filepath = os.path.abspath(
        os.path.join(model_dir, 'sequence-identity.txt'))
    with open(seqid_filepath, 'w') as seqid_file:
        seqid_file.write('%.1f\n' % target_model.seq_id)

    # Copy restraints.
    if write_modeller_restraints_file:
        restraint_filepath = os.path.abspath(
            os.path.join(model_dir, 'restraints.rsr.gz'))
        with open('%s.rsr' % target.id, 'r') as rsrfile:
            with gzip.open(restraint_filepath, 'wb') as rsrgzfile:
                rsrgzfile.write(rsrfile.read())
Ejemplo n.º 17
0
def get_sas(pdb, probe):
    import modeller

    # Read the PDB file
    env = modeller.environ()
    mdl = modeller.model(env)
    mdl.read(file=pdb)

    # Calculate atomic accessibilities (in Biso) with appropriate probe_radius
    myedat = modeller.energy_data()
    myedat.radii_factor = 1.6
    mdl.write_data(edat=myedat, output="PSA ATOMIC_SOL", psa_integration_step=0.05, probe_radius=probe)

    mdl.write(file=pdb.rsplit(".", 1)[0] + ".sas")

    # read SAS
    with open("%s.sas" % (pdb.rsplit(".", 1)[0],)) as data:
        D = data.readlines()

    Sas = {}
    for d in D:
        d = d.strip()
        if d[:4] == "ATOM":
            atom, res, resid, cid = d[12:16], d[17:20], int(d[22:26]), d[21]
            if cid == " ":
                cid = "A"
            Sas[(atom, res, resid, cid)] = float(d[60:66])
    return Sas
Ejemplo n.º 18
0
def _align_structures(structures, verbose):
    """Aligns structures using iterative structural alignment."""

    # set up modeller environment
    if verbose:
        modeller.log.verbose()
    else:
        modeller.log.none()
    env = modeller.environ()
    aln = modeller.alignment(env)

    # read structures into modeller environment
    for (id, structure) in structures.items():
        mdl = modeller.model(env, file=structure)
        aln.append_model(mdl, align_codes=id, atom_files=structure)

    # align structures using iterative structural alignment
    modeller.salign.iterative_structural_align(aln)

    # convert modeller alignment to Alignment object
    mod_aln_f = tempfile.NamedTemporaryFile(mode='w',
                                            prefix=fnameprefix,
                                            suffix='.ali',
                                            delete=False)
    mod_aln_fname = mod_aln_f.name
    mod_aln_f.close()
    aln.write(mod_aln_fname, alignment_format='PIR')
    alnobj = Alignment(mod_aln_fname)
    os.remove(mod_aln_fname)
    return alnobj
Ejemplo n.º 19
0
 def test_disulfide(self):
     """Test handling of disulfide bonds"""
     mlib = self.get_all_libraries()
     bsep = mdt.features.AtomBondSeparation(mlib,
                                     bins=mdt.uniform_bins(20, 0, 1.0))
     bsep_ss = mdt.features.AtomBondSeparation(mlib,
                                     bins=mdt.uniform_bins(20, 0, 1.0),
                                     disulfide=True)
     env = self.get_environ()
     mdl = modeller.model(env)
     mdl.build_sequence('CC')
     # When SG-SG distance is small enough, an extra bond
     # (separation feature = 1) should be detected, but only with
     # disulfide=True
     for (dist, num) in [(2.6, 11.0), (2.4, 12.0)]:
         sg1 = mdl.residues[0].atoms['SG']
         sg2 = mdl.residues[1].atoms['SG']
         sg1.x = sg1.y = sg1.z = 0.
         sg2.x = sg2.y = 0.
         sg2.z = dist
         a = modeller.alignment(env)
         a.append_model(mdl, atom_files='test', align_codes='test')
         m = mdt.Table(mlib, features=bsep)
         m.add_alignment(a, residue_span_range=(-999,0,0,999))
         self.assertEqual(m[1], 11.0)
         m2 = mdt.Table(mlib, features=bsep_ss)
         m2.add_alignment(a, residue_span_range=(-999,0,0,999))
         self.assertEqual(m2[1], num)
Ejemplo n.º 20
0
def complete_pdb(env, filename, special_patches=None, transfer_res_num=False,
                 model_segment=None, patch_default=True):
    """Reads the given PDB file, reorders the atoms to match the current
       topology library, and adds any missing atoms.

       You should read topology and parameters into 'env' before calling
       this routine.

       :param env: Modeller environment.
       :type  env: :class:`environ`
       :param filename: the PDB file to read.
       :param special_patches: if set, it is expected to be a routine which
              takes one parameter (the model) and applies any patches (e.g.
              disulfide bridges).
       :param transfer_res_num: if True, the residue numbering from the
              original PDB is retained (by default, residues are renumbered
              from 1).
       :param patch_default: if True, default terminal patches are applied.

       :return: the completed model.
       :rtype: :class:`model`"""

    vars = {}
    if model_segment is not None:
        vars['model_segment'] = model_segment
    mdl = model(env, file=filename, model_format='PDB_OR_MMCIF', **vars)
    # Save original chain IDs, since generate_topology resets them
    chain_ids = [c.name for c in mdl.chains]
    aln = alignment(env)
    aln.append_model(mdl, atom_files=filename, align_codes='struc')
    aln.append_model(mdl, atom_files=filename+'.ini', align_codes='struc-ini')
    mdl.clear_topology()
    mdl.generate_topology(aln[-1], patch_default=patch_default)
    if special_patches:
        special_patches(mdl)
    # Save original seq_id, as transfer_xyz sets it
    seq_id = mdl.seq_id
    mdl.transfer_xyz(aln)
    mdl.seq_id = seq_id
    # Restore original chain IDs
    for (chain, chainid) in zip(mdl.chains, chain_ids):
        chain.name = chainid
    mdl.build(initialize_xyz=False, build_method='INTERNAL_COORDINATES')
    if transfer_res_num:
        mdl2 = model(env, file=filename, **vars)
        mdl.res_num_from(mdl2, aln)
    return mdl
Ejemplo n.º 21
0
def make_model(msmseed):
    """
    Use MODELLER from the Sali lab to create a model between the target and template specified in the input

    Parameters
    ----------
    msmseed : MSMSeed
        object containing the alignment between target and template and template structure

    Returns
    -------
    msmseed : MSMSeed
        object containing the homology model built from the input alignment and template structure
    """

    import tempfile
    import os
    import modeller
    import modeller.automodel
    import shutil
    import simtk.openmm.app as app
    #if the target and template are the same, modeller dies.
    if msmseed.template_id == msmseed.target_id:
        msmseed.target_model = msmseed.template_structure
        return msmseed
    #first, we need to make a temp directory where we can put the files MODELLER needs
    temp_dir = tempfile.mkdtemp()
    try:
        os.chdir(temp_dir)
        alignment_file = open('aln_tmp.pir','w')
        alignment_file.writelines(msmseed.alignment)
        alignment_file.close()
        template_file = open(msmseed.template_id + '.pdb','w')
        template_pdb = msmseed.template_structure
        template_pdb.writeFile(template_pdb.topology, template_pdb.positions, template_file)
        template_file.close()
        modeller.log.none()
        env = modeller.environ()
        env.io.atom_files_directory = temp_dir
        a = modeller.automodel.allhmodel(env,
                                         # file with template codes and target sequence
                                         alnfile  = 'aln_tmp.pir',
                                         # PDB codes of the template
                                         knowns   = msmseed.template_id,
                                         # code of the target
                                         sequence = msmseed.target_id)
        a.make()
        tmp_model_pdbfilename = a.outputs[0]['name']
        target_model = modeller.model(env, file=tmp_model_pdbfilename)
        msmseed.sequence_similarity = target_model.seq_id
        msmseed.target_model = app.PDBFile(tmp_model_pdbfilename)
        msmseed.target_restraints = open('%s.rsr' % msmseed.target_id, 'r').readlines()
    except:
        msmseed.error_state = -1

    finally:
        shutil.rmtree(temp_dir)
    return msmseed
Ejemplo n.º 22
0
 def test_detect_invalid_residue_types_ok(self):
     """Test _detect_invalid_residue_types() with OK sequence"""
     with utils.temporary_directory() as tmpdir:
         fname = os.path.join(tmpdir, 'test.pdb')
         with open(fname, 'w') as fh:
             fh.write(pdb_line + '\n')
         e = modeller.environ()
         m = modeller.model(e, file=fname)
     cleaning._detect_invalid_residue_types(m)
Ejemplo n.º 23
0
 def make_model(self):
     import modeller
     env = modeller.environ()
     env.edat.dynamic_sphere= False
     with open('test.pdb', 'w') as fh:
         fh.write("ATOM      2  CA  ALA     1      27.449  14.935   5.140  1.00 29.87           C\n")
     m = modeller.model(env, file='test.pdb')
     os.unlink('test.pdb')
     return m
Ejemplo n.º 24
0
def copy_to_modeller(env, particles):
    fh = open("temp_particles.pdb", "w")
    for i in range(len(particles)):
        fh.write("ATOM  %5d  N   ALA     0       0.000   0.000   0.000  "
                 "1.00  0.00           C  \n" % (i))
    fh.close()
    mdl = modeller.model(env, file='temp_particles.pdb')
    os.unlink('temp_particles.pdb')
    return mdl
Ejemplo n.º 25
0
def copy_to_modeller(env, particles):
    fh = open("temp_particles.pdb", "w")
    for i in range(len(particles)):
        fh.write("ATOM  %5d  N   ALA     0       0.000   0.000   0.000  "
                 "1.00  0.00           C  \n" % (i))
    fh.close()
    mdl = modeller.model(env, file='temp_particles.pdb')
    os.unlink('temp_particles.pdb')
    return mdl
Ejemplo n.º 26
0
def spline(pdb_file, in_restraints, out_restraints):
    import modeller
    # Needed to keep our custom form alive for restraints.read()
    from allosmod.modeller.forms import TruncatedGaussian

    e = modeller.environ()
    m = modeller.model(e, file=pdb_file)
    m.restraints.read(file=in_restraints)
    convert_restraints(m.restraints)
    m.restraints.write(file=out_restraints)
Ejemplo n.º 27
0
def mk_strct_al_modeller(strct_data1, strct_data2):
    _stdout = sys.stdout
    sys.stdout = open(os.devnull, 'w')

    tmp_file = tempfile.NamedTemporaryFile(suffix=".fasta", delete=False)
    env = m.environ()

    aln = m.alignment(env)
    code1 = 'pdb' + strct_data1['id']
    code2 = 'pdb' + strct_data2['id']
    chain1 = strct_data1['chain_id']
    chain2 = strct_data2['chain_id']
    env.io.atom_files_directory = ['.', PDB_DIR]
    result = {}
    try:
        for (code, chain) in ((code1, chain1), (code2, chain2)):
            mdl = m.model(env, file=code, model_segment=('FIRST:'+chain,
                                                         'LAST:'+chain))
            aln.append_model(mdl, atom_files=code, align_codes=code+chain)

        for (weights, write_fit, whole) in (((1., 0., 0., 0., 1., 0.), False,
                                             True),
                                            ((1., 0.5, 1., 1., 1., 0.), False,
                                             True),
                                            ((1., 1., 1., 1., 1., 0.), True,
                                             False)):
            r = aln.salign(rms_cutoff=3.5, normalize_pp_scores=False,
                           rr_file='$(LIB)/as1.sim.mat', overhang=30,
                           gap_penalties_1d=(-450, -50),
                           gap_penalties_3d=(0, 3), gap_gap_score=0,
                           gap_residue_score=0,
                           alignment_type='tree', # If 'progresive', the tree is not
                                                  # computed and all structures will be
                                                  # aligned sequentially to the first
                           #ext_tree_file='1is3A_exmat.mtx', # Tree building can be avoided
                                                             # if the tree is input
                           feature_weights=weights, # For a multiple sequence alignment only
                                                    # the first feature needs to be non-zero
                           improve_alignment=True, fit=True, write_fit=False,
                           write_whole_pdb=whole, output='ALIGNMENT QUALITY')
        if r.qscorepct > 70:
            aln.write(file=tmp_file.name, alignment_format='FASTA')
            with open(tmp_file.name) as a:
                alignment = unwrap(a.read().splitlines())

            for i in range(len(alignment[1])):
                if alignment[1] != '-' and alignment[3] != '-':
                    pos1 = get_real_position_al(alignment[1], i)
                    pos2 = get_real_position_al(alignment[3], i)
                    result[pos1] = pos2
    except:
        print 'Modeller failed'
    sys.stdout.close()
    sys.stdout = _stdout
    return result
Ejemplo n.º 28
0
    def build_mdt_from_sequence(self, mlib, features, seq, **keys):
        """Build a simple test MDT for a given sequence"""
        env = self.get_environ()
        mdl = modeller.model(env)
        mdl.build_sequence(seq)

        m = mdt.Table(mlib, features=features)
        a = modeller.alignment(env)
        a.append_model(mdl, atom_files='test', align_codes='test')
        m.add_alignment(a, **keys)
        return m
Ejemplo n.º 29
0
    def get_test_mdt(self, mlib, features):
        env = self.get_environ()
        mdl = modeller.model(env)
        mdl.build_sequence('C')

        m = mdt.Table(mlib, features=features)
        a = modeller.alignment(env)
        a.append_model(mdl, atom_files='test', align_codes='test')
        m.add_alignment(a)
        m = m.reshape(features, [0] * len(features), [-1] * len(features))
        return m
Ejemplo n.º 30
0
def align_template_to_reference(msmseed, ref_msmseed):
    import modeller
    import tempfile
    import shutil
    import copy
    import os
    temp_dir = tempfile.mkdtemp()
    try:
        os.chdir(temp_dir)
        alignment_file = open('aln_tmp.pir', 'w')
        aln = _PIR_alignment(ref_msmseed.template_sequence,
                             ref_msmseed.template_id,
                             msmseed.template_sequence, msmseed.template_id)
        alignment_file.writelines(aln)
        alignment_file.close()
        template_file = open(msmseed.template_id + '.pdb', 'w')
        template_pdb = msmseed.template_structure
        template_pdb.writeFile(template_pdb.topology, template_pdb.positions,
                               template_file)
        template_file.close()
        ref_pdb = ref_msmseed.template_structure
        ref_file = open(ref_msmseed.template_id + '.pdb', 'w')
        ref_pdb.writeFile(ref_pdb.topology, ref_pdb.positions, ref_file)
        ref_file.close()
        modeller.log.none()
        env = modeller.environ()
        env.io.atom_files_directory = temp_dir
        aln = modeller.alignment(env,
                                 file='aln_tmp.pir',
                                 align_codes=(ref_msmseed.template_id,
                                              msmseed.template_id))
        mdl = modeller.model(env, file=ref_msmseed.template_id + '.pdb')
        mdl2 = modeller.model(env, file=msmseed.template_id + '.pdb')
        atmsel = modeller.selection(mdl).only_atom_types('CA')
        r = atmsel.superpose(mdl2, aln)
        msmseed.rmsd_to_reference = copy.deepcopy(r.rms)
    except Exception as e:
        msmseed.error_message = e.message
    finally:
        shutil.rmtree(temp_dir)
    return msmseed
Ejemplo n.º 31
0
def align(target_name: str, target_sequence: str, template_name: str, template_chain: chr) -> None:
    # creates a file called f'alignment_{target_name}_and_{template_name}.pir'
    # assumes a file already exists called f'{template_name}.pdb'
    target_pir = f'>P1;{target_name}\nsequence:{target_name}::::::::\n{target_sequence}*'
    target_pir = StringIO(target_pir)
    alignment_instance = m.alignment(env)
    model_instance = m.model(env)
    model_instance.read(file=template_name, model_segment=(f'FIRST:{template_chain}', f'LAST:{template_chain}'))
    alignment_instance.append_model(model_instance,
                                    align_codes=template_name,
                                    atom_files=template_name)
    alignment_instance.append(file=target_pir, align_codes=target_name)
    alignment_instance.align2d()
    alignment_instance.write(file=f'alignment_{target_name}_and_{template_name}.pir')
Ejemplo n.º 32
0
def salign0(env, ff1, ff2):
    import modeller
    aln = modeller.alignment(env)
    code = ff1
    mdl = modeller.model(env, file=code, model_segment=('FIRST:@', 'END:'))
    fit_atoms = determine_fit_atoms(mdl)
    aln.append_model(mdl, atom_files=code, align_codes=code)
    code = ff2
    mdl = modeller.model(env, file=code, model_segment=('FIRST:@', 'END:'))
    aln.append_model(mdl, atom_files=code, align_codes=code)

    for (weights, write_fit, whole) in (((1., 0., 0., 0., 1., 0.), False, True),
                                        ((1.,0.5, 1., 1., 1., 0.), False, True),
                                        ((1.,1., 1., 1., 1., 0.), True, False)):
        aln.salign(rms_cutoff=3.5, normalize_pp_scores=False,
                   rr_file='$(LIB)/as1.sim.mat', overhang=30,
                   gap_penalties_1d=(-450, -50),
                   gap_penalties_3d=(0, 3), gap_gap_score=0,
                   gap_residue_score=0, fit_atoms=fit_atoms,
                   alignment_type='tree', feature_weights=weights,
                   improve_alignment=True, fit=True, write_fit=write_fit,
                   write_whole_pdb=whole, output='ALIGNMENT QUALITY')
    return aln
Ejemplo n.º 33
0
 def _structureX_seq_from_modeller(self):
     """
     return a str containing the first two lines of the sequence corresponding to structureX 
     a file named [self._id]_structureX.seq also written
     """
     env = modeller.environ()
     model = modeller.model(env, file=self._id)
     aln = modeller.alignment(env)
     aln.append_model(model, align_codes=self._id)
     out_file = self._id + "_structureX.seq"
     aln.write(file=out_file)
     out_str = open(out_file, "r").read()
     out_str = [c for c in out_str.split("\n") if c]
     out_str = "\n".join(out_str[:2]) + "\n*"
     return out_str
Ejemplo n.º 34
0
 def test_read_alnstructure(self):
     """Check reading a Modeller alignment structure"""
     env = self.get_environ()
     m = modeller.model(env)
     m.build_sequence('C')
     a = modeller.alignment(env)
     a.append_model(m, align_codes='test', atom_files='test')
     m = IMP.Model()
     loader = IMP.modeller.ModelLoader(a[0])
     mp = loader.load_atoms(m)
     all_atoms = IMP.atom.get_by_type(mp, IMP.atom.ATOM_TYPE)
     self.assertEqual(7, len(all_atoms))
     # Alignment structures don't have charges or CHARMM types
     self.assertEqual(IMP.atom.Charged.get_is_setup(all_atoms[0]), False)
     self.assertEqual(IMP.atom.CHARMMAtom.get_is_setup(all_atoms[0]), False)
Ejemplo n.º 35
0
    def test_feature_triplet_residue(self):
        """Check triplet features with residue qualifier"""
        env = self.get_environ()
        mlib = self.get_mdt_library()
        mlib.tuple_classes.read("test/data/trpcls-residue.lib")
        feat = mdt.features.TupleType(mlib)
        m = mdt.Table(mlib, features=feat)

        mdl = modeller.model(env)
        mdl.build_sequence("AAACAAACSAA")
        a = modeller.alignment(env)
        a.append_model(mdl, align_codes="test")

        m.add_alignment(a)
        self.assertEqual([x for x in m], [6.0, 2.0, 1.0, 1.0, 0.0, 0.0])
Ejemplo n.º 36
0
 def test_read_alnstructure(self):
     """Check reading a Modeller alignment structure"""
     env = self.get_environ()
     m = modeller.model(env)
     m.build_sequence('C')
     a = modeller.alignment(env)
     a.append_model(m, align_codes='test', atom_files='test')
     m = IMP.kernel.Model()
     loader = IMP.modeller.ModelLoader(a[0])
     mp = loader.load_atoms(m)
     all_atoms = IMP.atom.get_by_type(mp, IMP.atom.ATOM_TYPE)
     self.assertEqual(7, len(all_atoms))
     # Alignment structures don't have charges or CHARMM types
     self.assertEqual(IMP.atom.Charged.get_is_setup(all_atoms[0]), False)
     self.assertEqual(IMP.atom.CHARMMAtom.get_is_setup(all_atoms[0]), False)
Ejemplo n.º 37
0
 def test_script3(self):
     """Test step 3 (density segmentation)"""
     # Make sure the script runs without errors
     p = subprocess.check_call(['scripts/script3_density_segmentation.py'])
     # Should have produced a PDB with coordinates of all 14 subunit centers
     e = modeller.environ()
     m = modeller.model(e, file='output/groel_segments_center.pdb')
     self.assertEqual(len(m.atoms), 14)
     self.assertEqual(len(m.residues), 14)
     # load_configuration file should load all 14 subunits, and set level
     wc = len(open('output/load_configuration.cmd').readlines())
     self.assertEqual(wc, 15)
     os.unlink('output/load_configuration.cmd')
     os.unlink('output/groel_segments_center.pdb')
     for i in range(14):
         os.unlink('output/groel_subunit_%d.mrc' % i)
Ejemplo n.º 38
0
    def test_detect_invalid_residue_types_bad(self):
        """Test _detect_invalid_residue_types() with bad sequence"""
        with utils.temporary_directory() as tmpdir:
            fname = os.path.join(tmpdir, 'test.pdb')
            with open(fname, 'w') as fh:
                fh.write("""
ATOM      1  N   CYS A   1      18.511  -1.416  15.632  1.00  6.84           C
ATOM      2  C   CYS A   1      18.511  -1.416  15.632  1.00  6.84           C
ATOM      3  N   HIE A   2      18.511  -1.416  15.632  1.00  6.84           C
ATOM      4  C   HIE A   2      18.511  -1.416  15.632  1.00  6.84           C
ATOM      5  N   HSD B   3      18.511  -1.416  15.632  1.00  6.84           C
ATOM      6  C   HSD B   3      18.511  -1.416  15.632  1.00  6.84           C
""")
            e = modeller.environ()
            m = modeller.model(e, file=fname)
        self.assertRaises(cleaning.InvalidResiduesError,
                          cleaning._detect_invalid_residue_types, m)
Ejemplo n.º 39
0
    def test_dihedral_diff_periodic(self):
        """Make sure that dihedral difference features are periodic"""

        def set_omega(mdl, angle):
            ca = mdl.atoms["CA:1"]
            c = mdl.atoms["C:1"]
            n2 = mdl.atoms["N:2"]
            ca2 = mdl.atoms["CA:2"]
            n2.x = n2.y = n2.z = 0.0
            c.x = -2.0
            c.y = c.z = 0.0
            ca.x = -2.0
            ca.y = 2.0
            ca.z = 0.0
            ca2.x = 0.0
            ca2.y = 2.0 * math.cos(math.pi * angle / 180.0)
            ca2.z = 2.0 * math.sin(math.pi * angle / 180.0)

        env = self.get_environ()
        mlib = self.get_mdt_library()
        # Make bins start at slightly less than -180, to allow for floating
        # point rounding
        omegadiff = mdt.features.OmegaDihedralDifference(mlib, mdt.uniform_bins(36, -180.01, 10))
        # Note that difference must be shortest around the circle, so
        # 100.0 - (-100.0) is not 200 degrees but -160 degrees
        for dih1, dih2, expected in (
            (80.0, 80.0, 0.0),
            (80.0, -80.0, -160.0),
            (-80.0, 80.0, 160.0),
            (-100.0, 100.0, -160.0),
            (100.0, -100.0, 160.0),
        ):
            m = mdt.Table(mlib, features=omegadiff)
            a = modeller.alignment(env)
            for d in dih1, dih2:
                mdl = modeller.model(env)
                mdl.build_sequence("CC")
                set_omega(mdl, d)
                a.append_model(mdl, atom_files="test", align_codes="test")
            m.add_alignment(a, sympairs=True)
            # 2 data points, 1 for each residue
            self.assertInTolerance(m.sample_size, 2.0, 1e-5)
            # Last residue has no omega, so is always undefined
            self.assertInTolerance(m[-1], 1.0, 1e-5)
            expected_bin = int((expected + 180.0) / 10.0)
            self.assertInTolerance(m[expected_bin], 1.0, 1e-5)
Ejemplo n.º 40
0
 def find(self):
     m = self._m = modeller.model(self.env, file=self.pdb_file)
     charge = [0] * len(m.residues)
     total = [0] * len(m.residues)
     for a1, a2 in get_restrained_atoms(m, open(self.rsr_file)):
         r1 = a1.residue.index - 1
         r2 = a2.residue.index - 1
         total[r1] += 1
         total[r2] += 1
         if charged_ca_pair(a1, a2):
             charge[r1] += 1
             charge[r2] += 1
     self._total = total
     self._contacts = []
     for n, z in enumerate(zip(charge, total)):
         c, t = z
         self._contacts.append((n+1, c if t > 143 else 0))
Ejemplo n.º 41
0
def get_q_ca(target, templates, rcut):
    import modeller

    modeller.log.none()
    e = modeller.environ()
    e.io.hetatm = False

    m = modeller.model(e)
    coord = get_coordinates(m, target)
    dist = get_distances(coord)
    q_tot = []
    q_cut = []
    for template in templates:
        t, c = get_qi_ca(m, len(coord), dist, template, rcut)
        q_tot.append(t)
        q_cut.append(c)
    write_q_scores(q_tot, open('qscore1to%d.dat' % len(coord), 'w'))
    write_q_scores(q_cut, open('qs_cut1to%d.dat' % len(coord), 'w'))
    def test_simple(self):
        """Simple complete run of get_pm_initialstruct"""
        from allosmod.get_pm_initialstruct import get_pm_initialstruct
        with utils.temporary_directory() as tmpdir:
            self.setup_inputs(tmpdir)

            check_output(['allosmod', 'get_pm_initialstruct', '--target', 'foo',
                          '--keep-alignment', 'test.aln', 'templates',
                          '.', '1', 'slow'], cwd=tmpdir)
            e = modeller.environ()
            m = modeller.model(e, file=os.path.join(tmpdir, 'pred_1fdx',
                               'foo.B99990001.pdb'))
            self.assertEqual([x.code for x in m.residues], ['A', 'W'])
            self.assertEqual(m.chains[0].name, 'A')
            for f in ('1fdx', 'foo.B99990001.pdb', 'foo.ini', 'foo.sch',
                      'test.aln', 'foo.D00000001', 'foo.rsr',
                      'foo.V99990001'):
                os.unlink(os.path.join(tmpdir, 'pred_1fdx', f))
Ejemplo n.º 43
0
 def test_simple(self):
     """Simple complete run of make_mod_inputs"""
     with allosmod.util.temporary_directory() as tempdir:
         self.setup_inputs(dir=tempdir)
         check_output(['allosmod', 'make_mod_inputs', '--', '1fdx',
                       'templates', '-3333', '3', '3', '3', '4'],
                      cwd=tempdir)
         e = modeller.environ()
         for fname in ('random.ini', '1fdx.ini'):
             m = modeller.model(e, file=os.path.join(tempdir, fname))
             self.assertEqual([x.code for x in m.residues], ['A', 'Y'])
             # Should have converted CA-only in all-atom model
             self.assertEqual(len(m.atoms), 18)
         with open(os.path.join(tempdir, '1fdx.rsr')) as fh:
             self.assertEqual(len(fh.readlines()), 78)
         for f in ('templates', 'avgpdb.pdb', '5fd1', 'align.ali',
                   'random.ini', '1fdx.ini', '1fdx.rsr'):
             os.unlink(os.path.join(tempdir, f))
Ejemplo n.º 44
0
    def test_modeller_restraints(self):
        """Check using Modeller restraints in IMP"""
        e = modeller.environ()
        e.edat.dynamic_sphere = False
        e.libs.topology.read("${LIB}/top_heav.lib")
        e.libs.parameters.read("${LIB}/par.lib")
        modmodel = modeller.model(e)
        modmodel.build_sequence("GGCC")
        feat = modeller.features.distance(modmodel.atoms[0], modmodel.atoms[-1])
        r = modeller.forms.gaussian(feature=feat, mean=10.0, stdev=1.0, group=modeller.physical.xy_distance)
        modmodel.restraints.add(r)

        m = IMP.kernel.Model()
        protein = IMP.modeller.ModelLoader(modmodel).load_atoms(m)
        atoms = IMP.atom.get_by_type(protein, IMP.atom.ATOM_TYPE)
        m.add_restraint(IMP.modeller.ModellerRestraints(m, modmodel, atoms))

        assertSimilarModellerIMPScores(self, modmodel, protein)
        self.assertAlmostEqual(m.evaluate(False), 5.7837, delta=1e-3)
Ejemplo n.º 45
0
    def test_imp_restraints(self):
        """Check using IMP restraints in Modeller"""
        e = modeller.environ()
        e.edat.dynamic_sphere = False
        e.libs.topology.read('${LIB}/top_heav.lib')
        e.libs.parameters.read('${LIB}/par.lib')
        modmodel = modeller.model(e)
        modmodel.build_sequence('GGCC')

        m = IMP.Model()
        protein = IMP.modeller.ModelLoader(modmodel).load_atoms(m)
        atoms = IMP.atom.get_by_type(protein, IMP.atom.ATOM_TYPE)
        r = IMP.core.DistanceRestraint(m, IMP.core.Harmonic(10.0, 1.0),
                                       atoms[0], atoms[-1])
        sf = IMP.core.RestraintsScoringFunction([r])

        t = modmodel.env.edat.energy_terms
        t.append(IMP.modeller.IMPRestraints(atoms, sf))
        assertSimilarModellerIMPScores(self, sf, modmodel, protein)
        self.assertAlmostEqual(sf.evaluate(False), 9.80, delta=1e-2)
Ejemplo n.º 46
0
 def test_get_coordinates_sc(self):
     """Test get_coordinates_sc() function"""
     e = modeller.environ()
     m = modeller.model(e)
     coord = cryptosite.analysis.get_coordinates_sc(
         m, os.path.join(TOPDIR, 'test', 'input', 'test_coord.pdb'))
     self.assertEqual(len(coord), 4)
     # First residue is a GLY with no CA -> no coordinates
     self.assertEqual(coord[0], None)
     # Second residue is a GLY with CA -> coordinates are those of
     # the GLY (0,0,0)
     self.assertAlmostEqual(coord[1].x, 0., places=1)
     self.assertAlmostEqual(coord[1].y, 0., places=1)
     self.assertAlmostEqual(coord[1].z, 0., places=1)
     # Third residue is a MET with no sidechain -> no coordinates
     self.assertEqual(coord[2], None)
     # Fourth residue is a MET with a sidechain -> mean coordinates returned
     self.assertAlmostEqual(coord[3].x, 5., places=1)
     self.assertAlmostEqual(coord[3].y, 10., places=1)
     self.assertAlmostEqual(coord[3].z, 15., places=1)
Ejemplo n.º 47
0
def get_contacts(pdb_file, rcut):
    import modeller

    modeller.log.none()
    e = modeller.environ()
    e.io.hetatm = True

    m = modeller.model(e, file=pdb_file)

    rcut2 = rcut * rcut
    av = [get_average_coordinate(r) for r in m.residues]
    for i in range(len(av) - 3):
        for j in range(i + 3, len(av)):
            ri = av[i].r
            rj = av[j].r
            if ri.hetatm and rj.hetatm:
                continue # do not print het-het contacts
            dist = get_contact_dist(av[i], av[j], rcut2)
            if dist is not None:
                yield ri, rj, dist
Ejemplo n.º 48
0
def get_qioft(landscape, rcut=11.):
    """Calculate Qi for all models in a landscape."""
    import modeller
    modeller.log.none()
    e = modeller.environ()
    e.io.hetatm = False
    m = modeller.model(e)

    for dirname in _get_subdirectories(landscape):
        with open(os.path.join(dirname, 'list')) as fh:
            temp1 = 'pm_' + fh.readline().strip()
            pm = os.path.join(dirname, temp1)
        models = sorted(glob.glob(os.path.join(dirname, 'pm.pdb.B[1-8]*.pdb')))

        coord = get_coordinates_sc(m, pm)
        dist = get_distances(coord, rcut)
        with open(
                os.path.join(dirname,
                             'qioft_%s_%dsc.dat' % (temp1, int(rcut))),
                'w') as fh:
            for model in models:
                get_qi(m, len(coord), dist, model, fh)
                fh.write('\n')
Ejemplo n.º 49
0
    def test_modeller_restraints(self):
        """Check using Modeller restraints in IMP"""
        e = modeller.environ()
        e.edat.dynamic_sphere = False
        e.libs.topology.read('${LIB}/top_heav.lib')
        e.libs.parameters.read('${LIB}/par.lib')
        modmodel = modeller.model(e)
        modmodel.build_sequence('GGCC')
        feat = modeller.features.distance(modmodel.atoms[0],
                                          modmodel.atoms[-1])
        r = modeller.forms.gaussian(feature=feat,
                                    mean=10.0,
                                    stdev=1.0,
                                    group=modeller.physical.xy_distance)
        modmodel.restraints.add(r)

        m = IMP.kernel.Model()
        protein = IMP.modeller.ModelLoader(modmodel).load_atoms(m)
        atoms = IMP.atom.get_by_type(protein, IMP.atom.ATOM_TYPE)
        m.add_restraint(IMP.modeller.ModellerRestraints(m, modmodel, atoms))

        assertSimilarModellerIMPScores(self, modmodel, protein)
        self.assertAlmostEqual(m.evaluate(False), 5.7837, delta=1e-3)
Ejemplo n.º 50
0
    def _create_aligment(self, env, base_models):

        _log.debug("creating alignments for %s with %s pdbs" %
                   (self.seqrecord.id, len(base_models)))
        aligned_models = []

        env.io.atom_files_directory = [self.out_folder + '/']

        aln = alignment(env)

        aln.append_sequence(str(self.seqrecord.seq))
        aln[0].code = str(self.seqrecord.id)

        for i, pdb_chain_file_path in enumerate(base_models, 1):
            # TODO sacar parseo feo
            code = pdb_chain_file_path.split("/")[-1].replace(".ent",
                                                              "").replace(
                                                                  "pdb", "")
            m = model(env, file=code)
            aln.append_model(m, align_codes=code)
            aln[i].code = code
            aligned_models.append(code)

        aln.malign()
        aln.id_table(matrix_file=self.seqrecord.id + '_family.mat')

        aln.write(file=self.model_directory() + "/" + self.seqrecord.id +
                  '.ali',
                  alignment_format='PIR')
        assert os.path.exists(self.model_directory() + "/" +
                              self.seqrecord.id +
                              '.ali'), "NOOOOOOOOOOOO!!!!:  " + os.getcwd(
                              ) + "/" + self.seqrecord.id + '.ali'
        aln.write(file=self.model_directory() + self.seqrecord.id + '.pap',
                  alignment_format='PAP')
        return aligned_models
def align_res_nums(apo_pdb_file, apo_pdb_id, apo_chain_id, holo_pdb_file,
                   holo_pdb_id, holo_chain_id):
    env = modeller.environ()
    aln = modeller.alignment(env)
    apo_model = modeller.model(env,
                               file=apo_pdb_file,
                               model_segment=("FIRST:%s" % (apo_chain_id),
                                              "LAST:%s" % (apo_chain_id)))
    aln.append_model(apo_model,
                     atom_files=apo_pdb_id,
                     align_codes="%s%s" % (apo_pdb_id, apo_chain_id))
    holo_model = modeller.model(env,
                                file=holo_pdb_file,
                                model_segment=("FIRST:%s" % (holo_chain_id),
                                               "LAST:%s" % (holo_chain_id)))
    aln.append_model(holo_model,
                     atom_files=holo_pdb_id,
                     align_codes="%s%s" % (holo_pdb_id, holo_chain_id))
    aln.salign()
    alignment_filename = "%s%s_%s%s_salign_output.ali" % (
        apo_pdb_id, apo_chain_id, holo_pdb_id, holo_chain_id)
    aln.write(file=alignment_filename, alignment_format="PIR")
    with open(alignment_filename, "r") as alignment_opened:
        alignment_lines = alignment_opened.readlines()
        # Ignore the header lines.  The format requires a 2-line header; there may be a blank line before this.
        if alignment_lines[0][0] == ">":
            line_index = 2
        else:
            line_index = 3
        apo_sequence_aligned = ""
        while True:
            next_line = alignment_lines[line_index].strip()
            apo_sequence_aligned += next_line
            if next_line[len(next_line) - 1] == "*":
                apo_sequence_aligned = apo_sequence_aligned[:-1]
                break
            line_index += 1
        if alignment_lines[line_index + 1][0] == ">":
            line_index += 3
        else:
            line_index += 4
        holo_sequence_aligned = ""
        while True:
            next_line = alignment_lines[line_index].strip()
            holo_sequence_aligned += next_line
            if next_line[len(next_line) - 1] == "*":
                holo_sequence_aligned = holo_sequence_aligned[:-1]
                break
            line_index += 1
    os.remove(alignment_filename)
    apo_pdb_res_numbers = get_numbers_from_pdb(apo_pdb_file, apo_chain_id)
    holo_pdb_res_numbers = get_numbers_from_pdb(holo_pdb_file, holo_chain_id)
    dict_key_apo_val_holo = {}
    holo_residues_passed = 0  # incremented whenever the iteration reaches a spot in the alignment where the holo sequence has a residue.
    apo_residues_passed = 0
    for i in range(len(holo_sequence_aligned)):
        if (apo_sequence_aligned[i] != "-") and (holo_sequence_aligned[i] !=
                                                 "-"):
            #print(len(apo_pdb_res_numbers), apo_residues_passed, len(holo_pdb_res_numbers), holo_residues_passed)
            #print(apo_pdb_res_numbers, holo_pdb_res_numbers)
            #print(len(apo_sequence_aligned), len(holo_sequence_aligned), "len")
            #print(apo_sequence_aligned, holo_sequence_aligned)
            dict_key_apo_val_holo[apo_pdb_res_numbers[
                apo_residues_passed]] = holo_pdb_res_numbers[
                    holo_residues_passed]
            holo_residues_passed += 1
            apo_residues_passed += 1
        elif (apo_sequence_aligned[i] != "-") and (holo_sequence_aligned[i]
                                                   == "-"):
            dict_key_apo_val_holo[
                apo_pdb_res_numbers[apo_residues_passed]] = "NA"
            apo_residues_passed += 1
        elif (apo_sequence_aligned[i]
              == "-") and (holo_sequence_aligned[i] != "-"):
            holo_residues_passed += 1
    print(dict_key_apo_val_holo)
    print(apo_sequence_aligned)
    print(holo_sequence_aligned)
    return dict_key_apo_val_holo
Ejemplo n.º 52
0
# This demonstrates using IMP.kernel.Restraints as additional energy terms in the
# Modeller scoring function, so that IMP scoring terms can be incorporated into
# existing comparative modeling pipelines.
#

import modeller
import IMP
import IMP.core
import IMP.modeller

# Set up Modeller and build a model from the GGCC primary sequence
e = modeller.environ()
e.edat.dynamic_sphere = False
e.libs.topology.read('${LIB}/top_heav.lib')
e.libs.parameters.read('${LIB}/par.lib')
modmodel = modeller.model(e)
modmodel.build_sequence('GGCC')

# Set up IMP and load the Modeller model in as a new Hierarchy
m = IMP.kernel.Model()
protein = IMP.modeller.ModelLoader(modmodel).load_atoms(m)

# Create a simple IMP distance restraint between the first and last atoms
atoms = IMP.atom.get_by_type(protein, IMP.atom.ATOM_TYPE)
r = IMP.core.DistanceRestraint(IMP.core.Harmonic(10.0,
                                                 1.0), atoms[0].get_particle(),
                               atoms[-1].get_particle())
m.add_restraint(r)

# Use the IMPRestraints class to add all of the IMP restraints to the
# Modeller scoring function
Ejemplo n.º 53
0
def runmodeller(target,database_path='default',models_path='default',templates_path='default',working='default',\
    mod_per_temp=20,excludes=[],max_seq_id=0.95,min_seq_id=0.25,max_eval=0.01,num_iter=1,gaps=False):
    '''File requirement: A .ali Sequence File and a pdb_95.pir database file

       Parameters:
       target: a .ali file path specifying the sequence of the protein
       models_path: the folder to save the models to. Defalut is ./models
       templates_path: the folder to save the templates to. Default is ./templates
       database: the path to the pdb_95.pir folder the default is ./database
       working: the working dir. The default is ./working
       mod_per_temp: number of models to be generated by modeller, the default is 20
       excludes: list of strings specifying PDB files to be excluded from the templates
       max_seq_id: Maximum of the sequence identity for a template to be considered. Ranges from 0 to 1. Default is 0.95
       min_seq_id: Minimum of the sequence identity for a template to be considered. Ranges from 0 to 1. Default is 0.25
       
       This function returns a list of paths of the generated models
    '''
    #set log to verbose
    modeller.log.verbose()
    env = modeller.environ()
    entering_dir = os.getcwd()

    #set paths
    if database_path == 'default':
        database_dir = os.path.abspath('./database')
    else:
        database_dir = os.path.abspath(database_path)
    target_file = os.path.basename(target)
    target_dir = os.path.dirname(os.path.abspath(target))
    if target_file.endswith('.ali'):
        target_name = target_file[:-4]
    if working == 'default':
        if not os.path.exists('./working'):
            os.mkdir('./working')
        working_dir = os.path.abspath('./working')
    else:
        working_dir = os.path.abspath(working)
        if not os.path.exists(working_dir):
            os.mkdir(working_dir)

    if models_path == 'default':
        if not os.path.exists('./models'):
            os.mkdir('./models')
        models_dir = os.path.abspath('./models')
    else:
        models_dir = os.path.abspath(models_path)
        if not os.path.exists(models_dir):
            os.mkdir(models_dir)

    if templates_path == 'default':
        if not os.path.exists('./templates'):
            os.mkdir('./templates')
        template_dir = os.path.abspath('./templates')
    else:
        template_dir = os.path.abspath(templates_path)
        if not os.path.exists(templates_path):
            os.mkdir(templates_path)
    #cd to woring. script will cd back at the end
    os.chdir(working_dir)
    #-- Prepare the input files
    #-- Read in the sequence database

    sdb = modeller.sequence_db(env)
    sdb.read(seq_database_file=database_dir + '/pdb_95.pir',
             seq_database_format='PIR',
             chains_list='ALL',
             minmax_db_seq_len=(30, 4000),
             clean_sequences=True)

    #-- Write the sequence database in binary form
    sdb.write(seq_database_file=database_dir + '/pdb_95.bin',
              seq_database_format='BINARY',
              chains_list='ALL')

    #-- Now, read in the binary database
    sdb.read(seq_database_file=database_dir + '/pdb_95.bin',
             seq_database_format='BINARY',
             chains_list='ALL')

    #-- Read in the target sequence/alignment
    target_aln = modeller.alignment(env)
    target_aln.append(file=target_dir + '/' + target_file,
                      alignment_format='PIR',
                      align_codes='ALL')

    #-- Convert the input sequence/alignment into
    #   profile format
    target_prf = target_aln.to_profile()

    #-- Scan sequence database to pick up homologous sequences
    target_prf.build(sdb,
                     matrix_offset=-450,
                     rr_file='${LIB}/blosum62.sim.mat',
                     gap_penalties_1d=(-500, -50),
                     n_prof_iterations=num_iter,
                     check_profile=True,
                     max_aln_evalue=max_eval,
                     gaps_in_target=gaps)

    #-- Write out the profile in text format
    target_prf.write(file=working_dir + '/' + target_name + '_profile.prf',
                     profile_format='TEXT')

    #-- Convert the profile back to alignment format
    target_aln = target_prf.to_alignment()

    #-- Write out the alignment file
    target_aln.write(file=working_dir + '/' + target_name + '_profile.ali',
                     alignment_format='PIR')
    #CLEAN UP
    del sdb, target_aln, target_prf, env

    #Read the name of the templates
    templates = []
    txt_input = open(working_dir + '/' + target_name + '_profile.prf', 'r')
    for eachline in txt_input:
        if eachline.lstrip(' ')[0] == '#':
            continue
        entries = eachline.split()
        if len(entries) != 13:
            continue
        if entries[2] != 'X':
            continue
        name = entries[1]
        seqid = float(entries[10])
        templates.append(template(name, seqid))
    txt_input.close()
    del entries, eachline, txt_input, name, seqid

    #Select templates
    i = 0
    while (i < len(templates)):
        if( (templates[i].code in excludes) \
             or (templates[i].seqid > max_seq_id*100) \
             or (templates[i].seqid < min_seq_id*100) \
          ):
            templates.pop(i)
        else:
            i += 1

    #Download templates pdb
    for eachtemplate in templates:
        pdbname = eachtemplate.code.upper()
        url = 'http://www.rcsb.org/pdb/files/%s.pdb' % pdbname
        pdb_download = open(template_dir + '/' + pdbname.lower() + '.pdb', 'w')
        pdb_download.write(urllib.urlopen(url).read())
        pdb_download.close()

    del pdb_download, pdbname, url

    #model alignment
    alnlist = []
    for i in range(len(templates)):
        env = modeller.environ()
        aln = modeller.alignment(env)
        mdl = modeller.model(env,
                             file=template_dir + '/' + templates[i].code,
                             model_segment=('FIRST:' + templates[i].chain,
                                            'LAST:' + templates[i].chain))
        aln.append_model(mdl,
                         align_codes=templates[i].name,
                         atom_files=templates[i].filename)
        aln.append(file=target_dir + '/' + target_file,
                   align_codes=target_name)

        aln.align2d()
        aln.write(file=working_dir + '/' + target_name + '-' +
                  templates[i].name + '.ali',
                  alignment_format='pir')
        aln.write(file=working_dir + '/' + target_name + '-' +
                  templates[i].name + '.pap',
                  alignment_format='pap')
        alnlist.append(working_dir + '/' + target_name + '-' +
                       templates[i].name + '.ali')
    del i, aln, env

    #Make models
    filelist = []
    for i in range(len(alnlist)):
        env = modeller.environ()
        env.io.atom_files_directory = [target_dir, working_dir, template_dir]
        a = modeller.automodel.automodel(
            env,
            alnfile=alnlist[i],
            knowns=templates[i].name,
            sequence=target_name,
            assess_methods=(modeller.automodel.assess.DOPE,
                            modeller.automodel.assess.GA341))
        a.starting_model = 1
        a.ending_model = mod_per_temp
        a.make()

        for j in range(1, mod_per_temp + 1):
            scrname = target_name + '.B9999' + str(j).zfill(4) + '.pdb'
            tgtname = models_dir + '/' + target_name + '_' + templates[
                i].code + '.B9999' + str(j).zfill(4) + '.pdb'
            os.rename(scrname, tgtname)
            filelist.append(tgtname)
    del env, a, scrname, tgtname, i, j

    os.chdir(entering_dir)
    return filelist
Ejemplo n.º 54
0
def align_res_nums(key_pdb_file, key_chain_id, value_pdb_file, value_chain_id):
    """Determine which residues in one PDB file correspond to which in another PDB file.

    Parameters
    ----------
    key_pdb_file : string
        The location of the pdb file whose residue numbers will be keys in the
        returned dictionary.
    key_chain_id : string
        The chain of key_pdb_file that will be aligned.
    value_pdb_file : string
        The location of the pdb file whose residue numbers will be values in the
        returned dictionary.
    value_chain_id : string
        The chain of value_pdb_file that will be aligned.

    Returns
    -------
    dict_residue_nums : dictionary{string : string}
        The keys and values are string-typed residue numbers (from key_pdb_file and
        value_pdb_file).  Any residues that are missing from value_pdb_file
        will be assigned the value "NA".  If any residues in key_pdb_file are
        classified as HETATMs, then they will only included in dict_residue_nums if they
        are MSE, MEX, or ABU.  This matches MODELLER's behavior.
    """

    # A temporary directory to store the output of Modeller's alignment.
    temp_dir_path = tempfile.mkdtemp()
    env = modeller.environ()
    aln = modeller.alignment(env)
    key_model = modeller.model(env,
                               file=key_pdb_file,
                               model_segment=("FIRST:%s" % (key_chain_id),
                                              "LAST:%s" % (key_chain_id)))
    aln.append_model(key_model,
                     atom_files=key_pdb_file,
                     align_codes="key%s" % (key_chain_id))
    value_model = modeller.model(env,
                                 file=value_pdb_file,
                                 model_segment=("FIRST:%s" % (value_chain_id),
                                                "LAST:%s" % (value_chain_id)))
    aln.append_model(value_model,
                     atom_files=value_pdb_file,
                     align_codes="value%s" % (value_chain_id))
    aln.salign()
    salign_out_loc = temp_dir_path + "key%s_value%s_salign_output.ali" % (
        key_chain_id, value_chain_id)
    aln.write(file=salign_out_loc, alignment_format="PIR")
    with open(salign_out_loc, "r") as alignment_opened:
        alignment_lines = alignment_opened.readlines()
        # Ignore the header lines.  The format requires a 2-line header; there may be a
        # blank line before this.
        if alignment_lines[0][0] == ">":
            line_index = 2
        else:
            line_index = 3
        key_sequence_aligned = ""
        while True:
            next_line = alignment_lines[line_index].strip()
            key_sequence_aligned += next_line
            if next_line[len(next_line) - 1] == "*":
                key_sequence_aligned = key_sequence_aligned[:-1]
                break
            line_index += 1
        if alignment_lines[line_index + 1][0] == ">":
            line_index += 3
        else:
            line_index += 4
        value_sequence_aligned = ""
        while True:
            next_line = alignment_lines[line_index].strip()
            value_sequence_aligned += next_line
            if next_line[len(next_line) - 1] == "*":
                value_sequence_aligned = value_sequence_aligned[:-1]
                break
            line_index += 1
    shutil.rmtree(temp_dir_path)
    key_pdb_res_numbers = get_numbers_from_pdb(key_pdb_file, key_chain_id)
    value_pdb_res_numbers = get_numbers_from_pdb(value_pdb_file,
                                                 value_chain_id)
    dict_residue_nums = {}
    # value_residues_passed is incremented whenever the iteration reaches a spot in the
    # alignment where the value sequence has a residue.
    value_residues_passed = 0
    key_residues_passed = 0
    for i in range(len(value_sequence_aligned)):
        # If both key_sequence_aligned and value_sequence_aligned have residues at
        # the position, then add a dictionary entry mapping the residue number in key
        # to the residue number in value.
        if (key_sequence_aligned[i] != "-") and (value_sequence_aligned[i] !=
                                                 "-"):
            current_key_resnum = key_pdb_res_numbers[key_residues_passed]
            current_value_resnum = value_pdb_res_numbers[value_residues_passed]
            dict_residue_nums[current_key_resnum] = current_value_resnum
            value_residues_passed += 1
            key_residues_passed += 1
        # If key_sequence_aligned has a residue where value_sequence_aligned has a gap,
        # then create a dictionary entry with value NA.
        elif (key_sequence_aligned[i] != "-") and (value_sequence_aligned[i]
                                                   == "-"):
            dict_residue_nums[key_pdb_res_numbers[key_residues_passed]] = "NA"
            key_residues_passed += 1
        # If key_sequence_aligned has a gap where value_sequence_aligned has a residue,
        # then don't add a dictionary entry.
        elif (key_sequence_aligned[i]
              == "-") and (value_sequence_aligned[i] != "-"):
            value_residues_passed += 1
    return dict_residue_nums
Ejemplo n.º 55
0
def peptide_rebuild_modeller(name, selection='all', hetatm=0, sequence=None,
        nmodels=1, hydro=0, quiet=1, *, _self=cmd):
    '''
DESCRIPTION

    Remodel the given selection using modeller. This is useful for example to
    build incomplete sidechains. More complicated modelling tasks are not
    the intention of this simple interface.

    Side effects: Alters "type" property for MSE residues in selection
    (workaround for bug #3512313).

USAGE

    peptide_rebuild_modeller name [, selection [, hetatm [, sequence ]]]

ARGUMENTS

    name = string: new object name

    selection = string: atom selection

    hetatm = 0/1: read and model HETATMs (ligands) {default: 0}

    sequence = string: if provided, use this sequence instead of the
    template sequence {default: None}

    nmodels = int: number of models (states) to generate {default: 1}
    '''
    import modeller
    from modeller.automodel import automodel, allhmodel

    import tempfile, shutil, os
    _assert_package_import()
    from .editing import update_identifiers

    nmodels, hetatm, quiet = int(nmodels), int(hetatm), int(quiet)

    if int(hydro):
        automodel = allhmodel

    tempdir = tempfile.mkdtemp()
    pdbfile = os.path.join(tempdir, 'template.pdb')
    alnfile = os.path.join(tempdir, 'aln.pir')

    cwd = os.getcwd()
    os.chdir(tempdir)

    if not quiet:
        print(' Notice: PWD=%s' % (tempdir))

    try:
        modeller.log.none()
        env = modeller.environ()
        env.io.hetatm = hetatm

        # prevent PyMOL to put TER records before MSE residues (bug #3512313)
        _self.alter('(%s) and polymer' % (selection), 'type="ATOM"')

        _self.save(pdbfile, selection)
        mdl = modeller.model(env, file=pdbfile)

        aln = modeller.alignment(env)
        aln.append_model(mdl, align_codes='foo', atom_files=pdbfile)

        # get sequence from non-present atoms
        if not sequence and _self.count_atoms('(%s) & !present' % (selection)):
            sequence = get_seq(selection)

        if sequence:
            aln.append_sequence(sequence)
            aln[-1].code = 'bar'
            aln.malign()
        aln.write(alnfile)

        a = automodel(env, alnfile=alnfile, sequence=aln[-1].code,
                knowns=[s.code for s in aln if s.prottyp.startswith('structure')])
        a.max_ca_ca_distance = 30.0

        if nmodels > 1:
            a.ending_model = nmodels
            from multiprocessing import cpu_count
            ncpu = min(cpu_count(), nmodels)
            if ncpu > 1:
                from modeller import parallel
                job = parallel.job(parallel.local_slave()
                        for _ in range(ncpu))
                a.use_parallel_job(job)

        a.make()

        for output in a.outputs:
            _self.load(output['name'], name, quiet=quiet)
    finally:
        os.chdir(cwd)
        shutil.rmtree(tempdir)

    _self.align(name, selection, cycles=0)
    if not sequence:
        update_identifiers(name, selection, _self=_self)

    if not quiet:
        print(' peptide_rebuild_modeller: done')
off1 = int(sys.argv[3])
off2 = int(sys.argv[4])

pdb_to_uniprot = modelutils.read_pdb_to_uniprot(pdbfile, chain)
uniprot_to_pdb = dict([(v, k) for (k, v) in pdb_to_uniprot.iteritems()])

off1 = uniprot_to_pdb[off1]
off2 = uniprot_to_pdb[off2]

# Supress verbose version notice
with open("/dev/null", "w") as fnull:
    oldout = sys.stdout
    sys.stdout = fnull
    env = modeller.environ()
    mdl = modeller.model(env,
                         file=pdbfile,
                         model_segment=('FIRST:' + chain, 'LAST:' + chain))
    sys.stdout = oldout


def find_res(off):

    match = [res for res in mdl.residues if int(res.num) == off]
    if len(match) != 1:
        raise Exception("Found %d residues with PDB offset %d" %
                        (len(match), off))

    return match[0]


res1 = find_res(off1)
Ejemplo n.º 57
0
        help='mobile pdb structure file to transfer sequence on',
        type=str)
    parser.add_argument(
        '-r',
        '--ref',
        help='reference pdb structure file with sequence to transfer',
        type=str)
    args = parser.parse_args()

    env = modeller.environ()
    lib = '/usr/lib/modeller9.23/modlib'
    env.libs.topology.read(file=f'{lib}/top_heav.lib')
    env.libs.parameters.read(file=f'{lib}/par.lib')
    aln = modeller.alignment(env)

    target = modeller.model(env, file=args.target)
    target_name = os.path.basename(args.target).split('.')[0]
    aln.append_model(target, align_codes=target_name)

    ref = modeller.model(env, file=args.ref)
    ref_name = os.path.basename(args.ref).split('.')[0]
    aln.append_model(ref, align_codes=ref_name)

    aln.align()
    # aln.align3d()
    alnfile = f'{target_name}_{ref_name}.seq'
    aln.write(file=alnfile)

    mdl = modeller.model(env)
    mdl.generate_topology(aln[ref_name])
    # Assign the average of the equivalent template coordinates to MODEL:
Ejemplo n.º 58
0
    def modelMissingAtoms(self, pdbFilename, outputFilename, chain=' ', debug = False, allHydrogen = False):
        """Model missing atoms/residues in a specified PDB file using MODELLER.
      
        REQUIRED ARGUMENTS
          pdbFilename - the filename of the PDB file to model missing atoms and residues for
          outputFilename - the filename for the desired final model

        OPTIONAL ARGUMENTS
          chain - the one-character chain ID of the chain to model (default ' ')
          debug - flag to print extra debug output and leave temporary directory (default False)

        NOTES

        The specified chain from pdbFilename is processed through MODELLER to build missing
        atoms and residues specified in the SEQRES entry of the PDB file but not present in
        the PDB file.
        
        This procedure is loosely based on the protocol appearing at
        
        http://salilab.org/modeller/wiki/Missing_residues
        
        The complete sequence is read from the SEQRES fields, and the DBREF field used to
        determine the span of residues described in the SEQRES fields.  A heavy-atom topology
        as constructed in MODELLER for the complete sequence, coordinates present in the PDB file
        transferred, and the remaining heavy-atom coordinates built from ideal geometry.
        Finally, a single standard simulated-annealing-based modeling step is performed using
        the standard automodel protocol but allowing only the atoms and residues that were undefined in
        the PDB file to move.
        
        """
        
        # Ensure specified PDB file exists.
        import os.path
        if not os.path.exists(pdbFilename):
            raise ParameterException, "Specified PDB file %s not found." % pdbFilename
        
        # Append full path to pdbFilename and outputFilename
        import os.path
        pdbFilename = os.path.abspath(pdbFilename)
        outputFilename = os.path.abspath(outputFilename)
                
        # Create a temporary directory for running MODELLER.
        import tempfile
        import os.path
        tmpdir = tempfile.mkdtemp()
        if debug: print "tmpdir = %s" % tmpdir
        
        # Get the complete sequence without chain breaks from the SEQRES/DBREF fields of the source PDB file.
        first_residue_id, complete_sequence = self.getCompleteSequence(pdbFilename, chain)
        nresidues = len(complete_sequence)
        last_residue_id = first_residue_id + nresidues - 1
        
        # Get the sequence of residues that are at least partially present in the PDB file as a dictionary.
        # present_sequence_dict[residue_id] is the one-letter-code of the residue residue_id, if there are any ATOM records for this residue.
        present_sequence_dict = self.getPresentSequence(pdbFilename, chain)
                
        # Generate alignment of the template sequence (residues for which any coordinates are defined) against the target (complete sequence from SEQRES/DBREF)
        present_sequence = ""
        for residue_id in range(first_residue_id, first_residue_id + nresidues):
            if present_sequence_dict.has_key(residue_id):
                # TODO: Check integrity against complete_sequence.            
                present_sequence += present_sequence_dict[residue_id]
            else:
                present_sequence += '-'

        # Change working directory to temporary directory.
        import os
        olddir = os.getcwd()
        os.chdir(tmpdir)

        # Generate alignment file for MODELLER.
        import os
        alignment_filename = os.path.join(tmpdir, 'model.ali')
        alignment_file = open(alignment_filename, 'w')
        print >> alignment_file, ">P1;%s" % "template"
        print >> alignment_file, "%s:%s:%d:%s:%d:%s:%s:%s:%s:%s" % ( "structure", pdbFilename, min(present_sequence_dict.keys()), chain, max(present_sequence_dict.keys()), chain, " ", " ", " ", " " )
        print >> alignment_file, "%s*" % present_sequence
        print >> alignment_file, ""    
        print >> alignment_file, ">P1;%s" % "target"
        print >> alignment_file, "%s:%s:%d:%s:%d:%s:%s:%s:%s:%s" % ( "sequence", "target", first_residue_id, chain, last_residue_id, chain, " ", " ", " ", " " )
        print >> alignment_file, "%s*" % complete_sequence
        alignment_file.close()
        if debug:
            import commands
            print "alignment file:"
            print commands.getoutput('cat %(alignment_filename)s' % vars())
        
        # Call MODELLER to generate topology, transfer coordinates, and build from internal coordinates.
        import modeller
        import modeller.automodel
        
        # Create a new environemnt.
        env = modeller.environ()
        
        # Specify the topology and parameters to use.
        # TODO: Is this necessary, or can we rely on the defaults?
        env.libs.topology.read(file='$(LIB)/top_heav.lib')
        env.libs.parameters.read(file='$(LIB)/par.lib')
        
        # Read in alignment.
        aln = modeller.alignment(env)
        print alignment_filename
        aln.append(file=alignment_filename, align_codes='all')
        
        # Create a model.
        model = modeller.model(env)
        
        # Generate the topology from the target sequence.
        model.generate_topology(aln['target'])
        
        # Transfer defined coordinates from template.
        model.transfer_xyz(aln)
        
        # Determine which atoms are undefined because they are missing in the template, and create a selection from them.
        missing_atom_indices = []
        for atom_index in range(len(model.atoms)):
            atom = model.atoms[atom_index]
            if atom.x == -999:
                missing_atom_indices.append(atom_index)
                
        # DEBUG: Write model coordinates to a PDB file.
        model.write(file=os.path.join(tmpdir,'transferred.pdb'))
        
        # Build the remaining undefined atomic coordinates from ideal internal coordinates stored in residue topology files.
        model.build(initialize_xyz=False, build_method='INTERNAL_COORDINATES')
        
        # DEBUG: Write model coordinates to a PDB file.
        if debug: model.write(file=os.path.join(tmpdir,'built.pdb'))
        
        # Override the 'select_atoms' routine in the 'automodel' class to select only the atoms with undefined atomic coordinates in template PDB.
        if (allHydrogen):
            class mymodel(modeller.automodel.allhmodel):
                def select_atoms(self):
                    missing_atoms = modeller.selection()
                    for atom_index in missing_atom_indices:
                        missing_atoms.add(self.atoms[atom_index])
                    return missing_atoms
        else:
            class mymodel(modeller.automodel.automodel):
                def select_atoms(self):
                    missing_atoms = modeller.selection()
                    for atom_index in missing_atom_indices:
                        missing_atoms.add(self.atoms[atom_index])
                    return missing_atoms

        # Ensure selected atoms feel all nonbonded interactions.
        env.edat.nonbonded_sel_atoms = 1
        
        # Set up automodel.
        #a = mymodel(env, inifile='built.pdb', alnfile=alignment_filename, knowns='template', sequence='target')
        a = mymodel(env, alnfile=alignment_filename, knowns='template', sequence='target')
        
        # Set parameters for automodel.
        # Build only one model.
        # TODO: Have more models built by default (perhaps 50?)
        a.starting_model = 1
        a.ending_model = 1
        
        # Generate model(s).
        a.make()

        # TODO: Rescore models and select the best one.
        # For now, we only use the first model.
        final_model_summary = a.outputs[0]
        
        # Copy resulting model to desired output PDB filename.
        import shutil
        shutil.copy(final_model_summary['name'], outputFilename)
                
        # Restore working directory.
        os.chdir(olddir)
        
        # Clean up temporary directory.
        if (not debug):
            for filename in os.listdir(tmpdir):
                os.remove(os.path.join(tmpdir,filename))
            os.rmdir(tmpdir)

        return
Ejemplo n.º 59
0
def _build_models(structfname, basedir, nmodels, refstructure, verbose,
                  seq_rep_list):
    """
    Builds replicate structural models of a list of protein sequences.

    seq_rep_list is a list of (sequence,replicates) pairs, giving each
    sequence object to be modeled and the number of replicates needed for
    that sequence object

    SIDE EFFECT: models are placed in basedir/sequence_id directory
    """

    # set up path links, assuming current working directory
    workingdir  = os.getcwd()
    structfname = os.path.normpath(os.path.join(workingdir, structfname))
    basedir     = os.path.normpath(os.path.join(workingdir, basedir))

    # calculate total number of reps for each sequence id
    reps_per_id = {}
    for seq,reps in seq_rep_list:
        if seq.identifier in reps_per_id.keys():
            reps_per_id[seq.identifier] += reps
        else:
            reps_per_id[seq.identifier] = reps

    for seq,reps in seq_rep_list:
        # calculate some information on total reps for this id and how many
        # models to build for this particular sequence
        total_reps_needed = reps_per_id[seq.identifier]
        models_per_rep    = round(nmodels / total_reps_needed)
        if models_per_rep < 1:
            models_per_rep = 1
        mynmodels = models_per_rep * reps

        # check this sequence's existing structures; bail out if done
        mindex = 1
        outdir = basedir + os.path.sep + seq.identifier
        if not os.path.isdir(outdir):
            os.makedirs(outdir)
        else:
            existing_fnames = [ x.split(os.path.sep)[-1] for x in \
                                glob.glob(outdir + os.path.sep + 'rep*.pdb') ]
            existing_reps = [ int(x.split('rep')[1].split('.pdb')[0]) for \
                              x in existing_fnames]
            if existing_reps:
                existing_reps.sort(reverse=True)
                last_rep = existing_reps[0]
                if last_rep < total_reps_needed:
                    mindex = existing_reps[0] + 1
                else:
                    continue

        # set up temporary directory for modeller execution
        with tempfile.TemporaryDirectory(prefix=dnameprefix) as tempdir:
            os.chdir(tempdir)

            # set up modeller environment
            if verbose:
                modeller.log.verbose()
            else:
                modeller.log.none()
            env = modeller.environ()
            env.io.atom_files_directory = [workingdir]

            # set up complete alignment
            aln = modeller.alignment(env)
            aln.append(file=structfname, remove_gaps=False)
            knowns = [s.code for s in aln]
            aln.append_sequence(seq.sequence)
            aln[-1].code = seq.identifier

            # write alignment - modeller doesn't like alignment in memory
            full_aln_fname = 'structaligntemp.ali'
            aln.write(full_aln_fname, alignment_format='PIR')

            # set up model assessments
            ASSESS_METHODS = [modeller.automodel.assess.DOPE,
                              modeller.automodel.assess.DOPEHR]
            ASSESS_NAMES   = ["DOPE score", "DOPE-HR score"]

            a = modeller.automodel.dope_loopmodel(env, alnfile=full_aln_fname,
                                                  knowns=knowns,
                                                  sequence=seq.identifier,
                                                  assess_methods=ASSESS_METHODS)
            a.starting_model = 1          # index of the first model
            a.ending_model   = mynmodels  # index of the last model
            # adjust optimization parameters
            a.library_schedule = modeller.automodel.autosched.slow
            a.md_level         = modeller.automodel.refine.slow
            a.make()  # do homology modeling

            # evaluate structural models
            ok_models = [ x for x in a.outputs if x["failure"] is None ]
            score_results = []

            for data in ok_models:
                fname  = data["name"]
                myscrs = []
                for score_name in ASSESS_NAMES:
                    myscrs.append(data[score_name])
                ave_score = sum(myscrs) / len(myscrs)
                score_results.append((ave_score, fname, myscrs))

            score_results.sort()
            best_models = score_results[:reps]
            rest_models = score_results[reps:]

            # map to reference structure
            refseq = aln[0]
            if refstructure:
                refseq = aln[refstructure]

            refcode  = refseq.code
            refpdbf  = refseq.atom_file
            refrange = refseq.range
            refmdl   = modeller.model(env, file=refpdbf, model_segment=refrange)
            refpos   = modeller.selection(refmdl).only_atom_types('CA')

            # get best models
            final_files = []
            for (score,infname,scores) in best_models:
                outfname = outdir + os.path.sep + 'rep{}.pdb'.format(mindex)
                final_files.append(outfname)

                # build alignment
                myaln = modeller.alignment(env)
                myaln.append(file=structfname, align_codes=(refcode),
                             remove_gaps=False)
                myaln.append_sequence(seq.sequence)
                myaln[-1].code      = seq.identifier
                myaln[-1].atom_file = infname

                # read pdb file
                mymodel = modeller.model(env, file=infname)
                # translate to reference coordinates
                r = refpos.superpose(mymodel, myaln)
                # write translated pdb file
                mymodel.write(file=outfname)

                mindex += 1

            os.chdir(workingdir)

    return