コード例 #1
0
 def __init__( self, fdcd, fref, box=0, pdbCode=None,
               log=StdLog(), verbose=0):
     """
     @param fdcd: path to input dcd file
     @type  fdcd: str
     @param fref: PDB or pickled PDBModel or directly an open PDBModel instancewith same atom content and order
     @type  fref: str or PDBModel
     @param box: expect line with box info at the end of each frame
                 (default: 0)
     @type  box: 1|0
     @param pdbCode: pdb code to be put into the model (default: None)
     @type  pdbCode: str
     @param log: LogFile instance [Biskit.StdLog]
     @type  log: Biskit.LogFile
     @param verbose: print progress to log [0]
     @type  verbose: int
     """
     self.fdcd = T.absfile( fdcd )
     self.dcd = open(self.fdcd, "r", 0)
     
     if isinstance(fref, str) :
         self.ref=PDBModel(T.absfile(fref), pdbCode=pdbCode)
     elif fref :
         self.ref = fref
         
     self.box  = box
     self.n = self.ref.lenAtoms()
     self.log = log
     self.verbose = verbose
     
     self.readHeader()
     self.set_pointerInfo()
コード例 #2
0
    def test_Capping(self):
        """PDBCleaner.capTerminals test"""
        ## Loading PDB...
        self.model = PDBModel(t.testRoot() + '/rec/1A2P_rec_original.pdb')

        self.c = PDBCleaner(self.model, log=self.log, verbose=self.local)
        self.m2 = self.c.capTerminals(breaks=True)
        self.assert_(self.m2.atomNames() == self.model.atomNames())

        self.m3 = self.model.clone()
        self.m3.removeRes([10, 11, 12, 13, 14, 15])
        self.m4 = self.m3.clone()

        self.c = PDBCleaner(self.m3, log=self.log, verbose=self.local)
        self.m3 = self.c.capTerminals(breaks=True, capC=[0], capN=[0, 1])
        self.assertEqual(
            self.m3.takeChains([0]).sequence()[:18], 'XVINTFDGVADXXKLPDN')

        if self.local:
            self.log.add('\nTesting automatic chain capping...\n')

        self.c = PDBCleaner(self.m4, log=self.log, verbose=self.local)
        self.m4 = self.c.capTerminals(auto=True)
        self.assertEqual(
            self.m4.takeChains([0]).sequence()[:18], 'XVINTFDGVADXXKLPDN')
コード例 #3
0
    def prepare_target(self, cluster, output_folder = None):
        """
        Create the 'target.fasta' file for each template to validate

        @param cluster: name of the cluster which is used for the
                        foldder name in which the validation is run.
        @type  cluster: str
        @param output_folder: top output folder
                             (default: None -> L{F_RESULT_FOLDER})
        @type  output_folder: str   
        """
        output_folder = output_folder or self.outFolder + \
                      self.F_RESULT_FOLDER + '/%s/'%cluster
        target = open("%s"%(output_folder + self.F_TEMPLATE_SEQUENCE),'w')
        target.write(">target\n")

        for pdb in self.pdb_path:
            if(cluster == os.path.split(pdb)[1][0:4]):

                model = PDBModel('%s'%pdb)
                sequence = model.sequence()
                sequence = MU.format_fasta(seq = sequence)
                target.write("%s"%sequence)

        target.close()
コード例 #4
0
ファイル: ValidationSetup.py プロジェクト: ostrokach/biskit
    def prepare_target(self, cluster, output_folder = None):
        """
        Create the 'target.fasta' file for each template to validate

        @param cluster: name of the cluster which is used for the
                        foldder name in which the validation is run.
        @type  cluster: str
        @param output_folder: top output folder
                             (default: None -> L{F_RESULT_FOLDER})
        @type  output_folder: str   
        """
        output_folder = output_folder or self.outFolder + \
                      self.F_RESULT_FOLDER + '/%s/'%cluster
        target = open("%s"%(output_folder + self.F_TEMPLATE_SEQUENCE),'w')
        target.write(">target\n")

        for pdb in self.pdb_path:
            if(cluster == os.path.split(pdb)[1][0:4]):

                model = PDBModel('%s'%pdb)
                sequence = model.sequence()
                sequence = MU.format_fasta(seq = sequence)
                target.write("%s"%sequence)

        target.close()
コード例 #5
0
    def test_capping_extra(self):
        """PDBCleaner.capTerminals extra challenge"""
        self.m2 = PDBModel(t.testRoot() + '/pdbclean/foldx_citche.pdb')
        self.c = PDBCleaner(self.m2, verbose=self.local, log=self.log)
        self.assertRaises(CappingError, self.c.capTerminals, auto=True)
        if self.local:
            self.log.add('OK: CappingError has been raised indicating clash.')

        self.assertEqual(len(self.m2.takeChains([1]).chainBreaks()), 1)
コード例 #6
0
ファイル: AmberParmBuilder.py プロジェクト: ostrokach/biskit
    def prepare(self):
        root = T.testRoot() + '/amber/'
        self.ref = PDBModel(T.testRoot() + '/amber/1HPT_0.pdb')
        self.refdry = root + '1HPT_0dry.pdb'

        self.dryparm = tempfile.mktemp('.parm', 'dry_')
        self.drycrd = tempfile.mktemp('.crd', 'dry_')
        self.drypdb = tempfile.mktemp('.pdb', 'dry_')
        self.wetparm = tempfile.mktemp('.parm', 'wet_')
        self.wetcrd = tempfile.mktemp('.crd', 'wet_')
        self.wetpdb = tempfile.mktemp('.pdb', 'wet_')
        self.leapout = tempfile.mktemp('.out', 'leap_')
コード例 #7
0
 def __init__(self, fpdb, log=None, verbose=True):
     """
     @param fpdb: pdb file OR PDBModel instance
     @type  fpdb: str OR Biskit.PDBModel
     @param log: Biskit.LogFile object (default: STDOUT)
     @type  log: Biskit.LogFile
     @param verbose: log warnings and infos (default: True)
     @type  verbose: bool
     """
     self.model = PDBModel(fpdb)
     self.log = log or StdLog()
     self.verbose = verbose
コード例 #8
0
ファイル: PDBCleaner.py プロジェクト: ostrokach/biskit
    def test_Capping( self ):
        """PDBCleaner.capTerminals test"""
        ## Loading PDB...
        self.model = PDBModel(t.testRoot() + '/rec/1A2P_rec_original.pdb')

        self.c = PDBCleaner( self.model, log=self.log, verbose=self.local )       
        self.m2 = self.c.capTerminals( breaks=True )
        self.assert_( self.m2.atomNames() == self.model.atomNames() )
        
        self.m3 = self.model.clone()
        self.m3.removeRes( [10,11,12,13,14,15] )
        self.m4 = self.m3.clone()
        
        self.c = PDBCleaner( self.m3, log=self.log, verbose=self.local )
        self.m3 = self.c.capTerminals( breaks=True, capC=[0], capN=[0,1])
        self.assertEqual( self.m3.takeChains([0]).sequence()[:18], 
                          'XVINTFDGVADXXKLPDN' )
        
        if self.local:
            self.log.add( '\nTesting automatic chain capping...\n' )
        
        self.c = PDBCleaner( self.m4, log=self.log, verbose=self.local )
        self.m4 = self.c.capTerminals( auto=True )
        self.assertEqual( self.m4.takeChains([0]).sequence()[:18], 
                          'XVINTFDGVADXXKLPDN' )
コード例 #9
0
ファイル: AmberParmBuilder.py プロジェクト: ostrokach/biskit
    def test_capIrregular(self):
        """AmberParmBuilder.capNME & capACE test"""
        gfp = PDBModel('1GFL')
        normal = gfp.takeResidues([10, 11])
        chromo = gfp.takeResidues([64, 65])

        self.a = AmberParmBuilder(normal)
        self.m4 = self.a.capACE(normal, 0)

        self.assertEqual(len(self.m4), 17)

        ##        del chromo.residues['biomol']

        self.m5 = self.a.capACE(chromo, 0)
        self.m5 = self.a.capNME(self.m5, 0)

        self.assertEqual(self.m5.sequence(), 'XSYX')
コード例 #10
0
ファイル: AmberParmBuilder.py プロジェクト: ostrokach/biskit
    def test_capIrregular( self ):
        """AmberParmBuilder.capNME & capACE test"""
        gfp = PDBModel('1GFL')
        normal = gfp.takeResidues([10,11])
        chromo = gfp.takeResidues([64,65])

        self.a = AmberParmBuilder( normal )
        self.m4 = self.a.capACE( normal, 0 )

        self.assertEqual( len(self.m4), 17 )
        
##        del chromo.residues['biomol']

        self.m5 = self.a.capACE( chromo, 0 )
        self.m5 = self.a.capNME( self.m5, 0 )
        
        self.assertEqual( self.m5.sequence(), 'XSYX' )
コード例 #11
0
ファイル: Analyse.py プロジェクト: graik/biskit
    def go(self, output_folder = None, template_folder = None):
        """
        Run analysis of models.

        @param output_folder: folder for result files
                         (default: None S{->} outFolder/L{F_RESULT_FOLDER})
        @type  output_folder: str
        @param template_folder: folder with template structures
                         (default: None S{->} outFolder/L{VS.F_RESULT_FOLDER})
        @type  template_folder: str
        """
        ##
        pdb_list = T.load(self.outFolder + self.F_MODELS)
        model = PDBModel(pdb_list[0])

        ## 
        output_folder = output_folder or self.outFolder + self.F_RESULT_FOLDER
        template_folder = template_folder or self.outFolder +VS.F_RESULT_FOLDER

        templates = self.__listDir(template_folder)

        ##
        global_rmsd_aa_wo_if, global_rmsd_aa_if = self.global_rmsd_aa()
        global_rmsd_ca_wo_if, global_rmsd_ca_if = self.global_rmsd_ca()
        nb_templates = len(templates)-1

        identities = self.get_identities(nb_templates)
        score = self.get_score()

        self.output_values(global_rmsd_aa_wo_if, global_rmsd_aa_if,
                           global_rmsd_ca_wo_if, global_rmsd_ca_if,
                           identities, score, nb_templates)

        ##
        aln_dic = self.get_aln_info(output_folder=self.outFolder)

        template_rmsd_dic = self.get_templates_rmsd(templates)
        templates_profiles = self.templates_profiles(templates,
                                                     aln_dic,
                                                     template_rmsd_dic)
        mean_rmsd = self.output_cross_val(aln_dic, templates_profiles,
                                          templates, model)

        ##
        mean_rmsd_atoms = model.res2atomProfile(mean_rmsd) 
        self.updatePDBs_charge(mean_rmsd_atoms, model)
コード例 #12
0
ファイル: Analyse.py プロジェクト: tybiot/biskit
    def go(self, output_folder=None, template_folder=None):
        """
        Run analysis of models.

        @param output_folder: folder for result files
                         (default: None S{->} outFolder/L{F_RESULT_FOLDER})
        @type  output_folder: str
        @param template_folder: folder with template structures
                         (default: None S{->} outFolder/L{VS.F_RESULT_FOLDER})
        @type  template_folder: str
        """
        ##
        pdb_list = T.load(self.outFolder + self.F_MODELS)
        model = PDBModel(pdb_list[0])

        ##
        output_folder = output_folder or self.outFolder + self.F_RESULT_FOLDER
        template_folder = template_folder or self.outFolder + VS.F_RESULT_FOLDER

        templates = self.__listDir(template_folder)

        ##
        global_rmsd_aa_wo_if, global_rmsd_aa_if = self.global_rmsd_aa()
        global_rmsd_ca_wo_if, global_rmsd_ca_if = self.global_rmsd_ca()
        nb_templates = len(templates) - 1

        identities = self.get_identities(nb_templates)
        score = self.get_score()

        self.output_values(global_rmsd_aa_wo_if, global_rmsd_aa_if,
                           global_rmsd_ca_wo_if, global_rmsd_ca_if, identities,
                           score, nb_templates)

        ##
        aln_dic = self.get_aln_info(output_folder=self.outFolder)

        template_rmsd_dic = self.get_templates_rmsd(templates)
        templates_profiles = self.templates_profiles(templates, aln_dic,
                                                     template_rmsd_dic)
        mean_rmsd = self.output_cross_val(aln_dic, templates_profiles,
                                          templates, model)

        ##
        mean_rmsd_atoms = model.res2atomProfile(mean_rmsd)
        self.updatePDBs_charge(mean_rmsd_atoms, model)
コード例 #13
0
ファイル: PDBCleaner.py プロジェクト: ostrokach/biskit
 def test_capping_extra( self ):
     """PDBCleaner.capTerminals extra challenge"""
     self.m2 = PDBModel( t.testRoot() + '/pdbclean/foldx_citche.pdb' )
     self.c = PDBCleaner( self.m2, verbose=self.local, log=self.log)
     self.assertRaises(CappingError, self.c.capTerminals, auto=True)
     if self.local:
         self.log.add('OK: CappingError has been raised indicating clash.' )
     
     self.assertEqual( len(self.m2.takeChains([1]).chainBreaks()), 1 )
コード例 #14
0
ファイル: AmberParmBuilder.py プロジェクト: ostrokach/biskit
    def __init__(self,
                 model,
                 leap_template=F_leap_in,
                 leaprc=None,
                 leap_out=None,
                 leap_in=None,
                 leap_pdb=None,
                 log=None,
                 debug=0,
                 verbose=0,
                 **kw):
        """
        @param model: model
        @type  model: PDBModel or str
        @param leap_template: path to template file for leap input
        @type  leap_template: str
        @param leaprc: forcefield parameter file or code (e.g. ff99)
        @type  leaprc: str
        @param leap_out: target file for leap.log (default: discard)
        @type  leap_out: str
        @param leap_in: target file for leap.in script (default: discard)
        @type  leap_in: str
        @param kw: kw=value pairs for additional options in the leap_template
        @type  kw: key=value
        """
        self.m = PDBModel(model)

        self.leap_template = leap_template
        self.leaprc = leaprc

        self.leap_pdb = leap_pdb or tempfile.mktemp('_leap_pdb')
        self.keep_leap_pdb = leap_pdb is not None

        self.leap_in = leap_in
        self.leap_out = leap_out

        self.log = log or StdLog()

        self.output = None  # last output of leap

        self.debug = debug
        self.verbose = verbose

        self.__dict__.update(kw)
コード例 #15
0
ファイル: AmberParmBuilder.py プロジェクト: ostrokach/biskit
    def test_AmberParmMirror(self):
        """AmberParmBuilder.parmMirror test"""
        ref = self.ref
        mask = N.logical_not(ref.maskH2O())  ## keep protein and Na+ ion
        self.mdry = ref.compress(mask)

        self.a = AmberParmBuilder(self.mdry,
                                  verbose=self.local,
                                  leap_out=self.leapout,
                                  debug=self.DEBUG)

        self.a.parmMirror(f_out=self.dryparm, f_out_crd=self.drycrd)

        self.a.parm2pdb(self.dryparm, self.drycrd, self.drypdb)

        self.m1 = PDBModel(self.drypdb)
        self.m2 = PDBModel(self.refdry)

        eq = N.array(self.m1.xyz == self.m2.xyz)
        self.assert_(eq.all())
コード例 #16
0
ファイル: AmberParmBuilder.py プロジェクト: ostrokach/biskit
    def prepare(self):
        root = T.testRoot() + '/amber/'
        self.ref = PDBModel( T.testRoot() + '/amber/1HPT_0.pdb')
        self.refdry = root + '1HPT_0dry.pdb'

        self.dryparm = tempfile.mktemp('.parm', 'dry_')
        self.drycrd  = tempfile.mktemp('.crd', 'dry_')
        self.drypdb  = tempfile.mktemp('.pdb', 'dry_')
        self.wetparm = tempfile.mktemp('.parm', 'wet_')
        self.wetcrd  = tempfile.mktemp('.crd', 'wet_')
        self.wetpdb  = tempfile.mktemp('.pdb', 'wet_')
        self.leapout = tempfile.mktemp('.out', 'leap_')
コード例 #17
0
ファイル: PDBCleaner.py プロジェクト: ostrokach/biskit
 def __init__( self, fpdb, log=None, verbose=True ):
     """
     @param fpdb: pdb file OR PDBModel instance
     @type  fpdb: str OR Biskit.PDBModel
     @param log: Biskit.LogFile object (default: STDOUT)
     @type  log: Biskit.LogFile
     @param verbose: log warnings and infos (default: True)
     @type  verbose: bool
     """
     self.model = PDBModel( fpdb )
     self.log = log or StdLog()
     self.verbose = verbose
コード例 #18
0
ファイル: AmberEntropist.py プロジェクト: ostrokach/biskit
    def prepareRef(self, fname):
        """
        Prepare reference model.
        
        @param fname: file name 
        @type  fname: str

        @return: reference structure
        @rtype: PDBModel|Complex        

        @raise EntropistError: if unknown reference type
        """
        if not fname:
            return None

        if self.__splitFilenames(fname):
            f1, f2 = self.__splitFilenames(fname)
            m1, m2 = PDBModel( self.__getModel(f1) ), \
                     PDBModel( self.__getModel(f2) )

            ref = Complex(m1, m2)
        else:
            ref = t.load(fname)

        if isinstance(ref, Trajectory):
            ref = ref.ref

        if isinstance(ref, PDBModel):
            return self.__cleanAtoms(ref)

        if isinstance(ref, Complex):
            self.__cleanAtoms(ref.rec_model)
            self.__cleanAtoms(ref.lig_model)
            ref.lig_model_transformed = None
            return ref

        raise EntropistError, 'unknown reference type'
コード例 #19
0
    def test_Benchmark(self):
        """Mod.Benchmark test"""
        from Biskit import Pymoler

        self.b = Benchmark(self.outfolder)

        self.b.go()

        pdb = T.load(self.outfolder + "/modeller/PDBModels.list")[0]

        reference = PDBModel(self.outfolder + "/reference.pdb")
        tmp_model = pdb.clone()

        reference = reference.compress(reference.maskCA())
        pdb = pdb.compress(pdb.maskCA())
        tmp_model = tmp_model.compress(tmp_model.maskCA())

        tm = tmp_model.transformation(reference,
                                      n_it=0,
                                      profname="rms_outliers")
        pdb = pdb.transform(tm)

        if self.local:
            pm = Pymoler()
            pm.addPdb(pdb, "m")
            pm.addPdb(reference, "r")
            pm.colorAtoms("m", tmp_model.profile("rms_outliers"))
            pm.add('set ribbon_trace,1')
            pm.add('show ribbon')
            pm.show()

            if self.DEBUG:
                self.log.add(
                    'The result from the benchmarking is in %s/benchmark'%\
                    self.outfolder)

            globals().update(locals())
コード例 #20
0
ファイル: Benchmark.py プロジェクト: ostrokach/biskit
    def test_Benchmark(self):
        """Mod.Benchmark test"""
        from Biskit import Pymoler

        self.b = Benchmark( self.outfolder )

        self.b.go()

        pdb = T.load( self.outfolder + "/modeller/PDBModels.list" )[0]

        reference = PDBModel(self.outfolder  + "/reference.pdb" )
        tmp_model = pdb.clone()

        reference = reference.compress( reference.maskCA() )
        pdb       = pdb.compress( pdb.maskCA() )
        tmp_model = tmp_model.compress(tmp_model.maskCA())

        tm = tmp_model.transformation( reference, n_it=0,
                                       profname="rms_outliers")
        pdb = pdb.transform( tm )

        if self.local:
            pm = Pymoler()
            pm.addPdb( pdb, "m" )
            pm.addPdb( reference, "r" )
            pm.colorAtoms( "m", tmp_model.profile("rms_outliers") )
            pm.add('set ribbon_trace,1')
            pm.add('show ribbon')
            pm.show()

            if self.DEBUG:
                self.log.add(
                    'The result from the benchmarking is in %s/benchmark'%\
                    self.outfolder)

            globals().update( locals() )
コード例 #21
0
def prepareSource(inFile,
                  outFile,
                  wat=1,
                  sort=1,
                  foldx=1,
                  surf=1,
                  dens=1,
                  cons=1,
                  dssp=1,
                  delphi=0):
    """
    Strip waters, add profiles and save as doped source model.
    """

    source = PDBModel(inFile)

    if wat:
        source.remove(lambda a: a['residue_name'] in ['HOH', 'WAT', 'TIP3'])

    if sort:
        source = source.sort()

    doper = PDBDope(source)

    if surf:
        ##         doper.addASA()
        ##         doper.addSurfaceMask()
        doper.addSurfaceRacer(probe=1.4)

    if foldx:
        doper.addFoldX()

    if dens:
        doper.addDensity()

    if dssp:
        doper.addSecondaryStructure()

    if delphi:
        doper.addDelphi()

    try:
        if cons:
            doper.addConservation()
    except:
        errWriteln('\n ERROR: Conservation profile could not be added to '\
                   + str(sourceOut) + '\n' )

    source.saveAs(outFile)

    return source
コード例 #22
0
ファイル: AmberParmBuilder.py プロジェクト: ostrokach/biskit
    def test_AmberParmSolvated(self):
        """AmberParmBuilder.parmSolvated test"""
        ## remove waters and hydrogens
        self.mdry = self.ref.compress(self.ref.maskProtein())
        self.mdry = self.mdry.compress(self.mdry.maskHeavy())

        self.a = AmberParmBuilder(self.mdry,
                                  leap_out=self.leapout,
                                  verbose=self.local,
                                  debug=self.DEBUG)

        self.a.parmSolvated(self.wetparm,
                            f_out_crd=self.wetcrd,
                            f_out_pdb=self.wetpdb,
                            box=2.5)

        self.m3 = PDBModel(self.wetpdb)

        m3prot = self.m3.compress(self.m3.maskProtein())
        refprot = self.ref.compress(self.ref.maskProtein())
        refprot.xplor2amber()

        self.assertEqual(self.ref.lenChains(), self.m3.lenChains())
        self.assertEqual(refprot.atomNames(), m3prot.atomNames())
コード例 #23
0
    def prepare_templatesfasta(self,
                               cluster_list,
                               pdb_dictionary,
                               output_folder=None):
        """
        Create 'templates.fasta' file for each template to validate

        @param cluster_list: pdb codes of templates
        @type  cluster_list: [str]
        @param pdb_dictionary: dictionary mapping pdb code to pdb files
                               used by Modeller
        @type  pdb_dictionary: {str:str}
        @param output_folder: top output folder
                             (default: None -> L{F_RESULT_FOLDER})
        @type  output_folder: str        
        """
        output_folder = output_folder or self.outFolder + self.F_RESULT_FOLDER

        for cluster in cluster_list:
            folder = '%s/%s'%(output_folder, cluster + \
                              TemplateSearcher.F_RESULT_FOLDER)
            if not os.path.exists(folder):
                os.mkdir(folder)
            else:
                print 'Directory %s exists, skipping'%( cluster + \
                                                        TemplateSearcher.F_RESULT_FOLDER)

            pdb_path = pdb_dictionary["%s" % cluster]
            PDBModels_list = []
            pdb_name = []

            for pdb in pdb_path:
                PDBModels_list.append(PDBModel('%s' % pdb))
                pdb_name.append(os.path.split(pdb)[1][:-4])

            input_file = self.outFolder + self.F_RESULT_FOLDER + \
                       '/%s'%cluster + TemplateSearcher.F_RESULT_FOLDER \
                       + self.F_TEMPLATES_FASTA

            templatesfasta = open("%s" % input_file, 'w')

            for i in range(len(PDBModels_list)):
                templatesfasta.write(">%s\n" % pdb_name[i])
                sequence = PDBModels_list[i].sequence()
                sequence = MU.format_fasta(seq=sequence)
                templatesfasta.write("%s\n" % sequence)

            templatesfasta.close()
コード例 #24
0
ファイル: AmberParmBuilder.py プロジェクト: ostrokach/biskit
    def __init__( self, model,
                  leap_template=F_leap_in,
                  leaprc=None,
                  leap_out=None, leap_in=None,
                  leap_pdb=None,
                  log=None,
                  debug=0,
                  verbose=0,
                  **kw ):
        """
        @param model: model
        @type  model: PDBModel or str
        @param leap_template: path to template file for leap input
        @type  leap_template: str
        @param leaprc: forcefield parameter file or code (e.g. ff99)
        @type  leaprc: str
        @param leap_out: target file for leap.log (default: discard)
        @type  leap_out: str
        @param leap_in: target file for leap.in script (default: discard)
        @type  leap_in: str
        @param kw: kw=value pairs for additional options in the leap_template
        @type  kw: key=value
        """
        self.m = PDBModel( model )

        self.leap_template = leap_template
        self.leaprc  = leaprc

        self.leap_pdb = leap_pdb or tempfile.mktemp( '_leap_pdb' )
        self.keep_leap_pdb = leap_pdb is not None

        self.leap_in = leap_in
        self.leap_out= leap_out

        self.log = log or StdLog()
        
        self.output = None   # last output of leap

        self.debug = debug
        self.verbose = verbose

        self.__dict__.update( kw )
コード例 #25
0
ファイル: AmberParmBuilder.py プロジェクト: ostrokach/biskit
    def test_AmberParmSolvated( self ):
        """AmberParmBuilder.parmSolvated test"""
        ## remove waters and hydrogens
        self.mdry = self.ref.compress( self.ref.maskProtein() )
        self.mdry = self.mdry.compress( self.mdry.maskHeavy() )

        self.a = AmberParmBuilder( self.mdry,
                                   leap_out=self.leapout,
                                   verbose=self.local, debug=self.DEBUG)

        self.a.parmSolvated( self.wetparm, f_out_crd=self.wetcrd,
                             f_out_pdb=self.wetpdb,
                             box=2.5 )

        self.m3 = PDBModel( self.wetpdb )

        m3prot = self.m3.compress( self.m3.maskProtein() )
        refprot= self.ref.compress( self.ref.maskProtein() )
        refprot.xplor2amber()
        
        self.assertEqual( self.ref.lenChains(), self.m3.lenChains() )
        self.assertEqual( refprot.atomNames(), m3prot.atomNames() )
コード例 #26
0
ファイル: dope.py プロジェクト: graik/biskit
def prepareSource( inFile, outFile, wat=1, sort=1,
                   surf=1, dens=1, cons=1, dssp=1, delphi=0 ):
    """
    Strip waters, add profiles and save as doped source model.
    """

    source = PDBModel( inFile )

    if wat:
        source.remove( lambda a: a['residue_name'] in ['HOH','WAT','TIP3'] )

    if sort:
        source = source.sort()

    doper = PDBDope( source )

    if surf:
##         doper.addASA()
##         doper.addSurfaceMask()
        doper.addSurfaceRacer( probe=1.4 )

##    if foldx:
##        doper.addFoldX()

    if dens:
        doper.addDensity()

    if dssp:
        doper.addSecondaryStructure()
        
    if delphi:
        doper.addDelphi()

    try:
        if cons:
            doper.addConservation( )
    except:
        errWriteln('\n ERROR: Conservation profile could not be added to '\
                   + str(sourceOut) + '\n' )

    source.saveAs( outFile )

    return source
コード例 #27
0
ファイル: PDBCleaner.py プロジェクト: ostrokach/biskit
class Test(BT.BiskitTest):
    """Test class """

    def prepare(self):
        from Biskit.LogFile import LogFile
        import tempfile


    def test_PDBCleaner( self ):
        """PDBCleaner general test"""
        
        ## Loading PDB...
        self.c = PDBCleaner( t.testRoot() + '/rec/1A2P_rec_original.pdb',
                             log=self.log,
                             verbose=self.local)
        
        self.m = self.c.process()

        self.assertAlmostEqual( self.m.mass(), 34029.0115499993, 7 )
        
    def test_DNACleaning( self ):
        """PDBCleaner DNA test"""
        ## Loading PDB...
        self.c = PDBCleaner( t.testRoot() + 'amber/entropy/0_com.pdb',
                             log=self.log, verbose=self.local )
        
        self.dna = self.c.process(amber=True)

        self.assertAlmostEqual( self.dna.mass(), 26953.26, 1 )
        
        
    def test_Capping( self ):
        """PDBCleaner.capTerminals test"""
        ## Loading PDB...
        self.model = PDBModel(t.testRoot() + '/rec/1A2P_rec_original.pdb')

        self.c = PDBCleaner( self.model, log=self.log, verbose=self.local )       
        self.m2 = self.c.capTerminals( breaks=True )
        self.assert_( self.m2.atomNames() == self.model.atomNames() )
        
        self.m3 = self.model.clone()
        self.m3.removeRes( [10,11,12,13,14,15] )
        self.m4 = self.m3.clone()
        
        self.c = PDBCleaner( self.m3, log=self.log, verbose=self.local )
        self.m3 = self.c.capTerminals( breaks=True, capC=[0], capN=[0,1])
        self.assertEqual( self.m3.takeChains([0]).sequence()[:18], 
                          'XVINTFDGVADXXKLPDN' )
        
        if self.local:
            self.log.add( '\nTesting automatic chain capping...\n' )
        
        self.c = PDBCleaner( self.m4, log=self.log, verbose=self.local )
        self.m4 = self.c.capTerminals( auto=True )
        self.assertEqual( self.m4.takeChains([0]).sequence()[:18], 
                          'XVINTFDGVADXXKLPDN' )
        
        
    def test_capping_extra( self ):
        """PDBCleaner.capTerminals extra challenge"""
        self.m2 = PDBModel( t.testRoot() + '/pdbclean/foldx_citche.pdb' )
        self.c = PDBCleaner( self.m2, verbose=self.local, log=self.log)
        self.assertRaises(CappingError, self.c.capTerminals, auto=True)
        if self.local:
            self.log.add('OK: CappingError has been raised indicating clash.' )
        
        self.assertEqual( len(self.m2.takeChains([1]).chainBreaks()), 1 )
コード例 #28
0
class NamdDCDParser:
    
    def __init__( self, fdcd, fref, box=0, pdbCode=None,
                  log=StdLog(), verbose=0):
        """
        @param fdcd: path to input dcd file
        @type  fdcd: str
        @param fref: PDB or pickled PDBModel or directly an open PDBModel instancewith same atom content and order
        @type  fref: str or PDBModel
        @param box: expect line with box info at the end of each frame
                    (default: 0)
        @type  box: 1|0
        @param pdbCode: pdb code to be put into the model (default: None)
        @type  pdbCode: str
        @param log: LogFile instance [Biskit.StdLog]
        @type  log: Biskit.LogFile
        @param verbose: print progress to log [0]
        @type  verbose: int
        """
        self.fdcd = T.absfile( fdcd )
        self.dcd = open(self.fdcd, "r", 0)
        
        if isinstance(fref, str) :
            self.ref=PDBModel(T.absfile(fref), pdbCode=pdbCode)
        elif fref :
            self.ref = fref
            
        self.box  = box
        self.n = self.ref.lenAtoms()
        self.log = log
        self.verbose = verbose
        
        self.readHeader()
        self.set_pointerInfo()

    def readHeader(self, verbose=False):
        """
        Read NAMD DCD coordinates file
        Only for 32bit DCD with opposite endianness!
        AND only if no atom is fixed!
        """
        f = self.dcd
        unpack = struct.unpack
        
        # Check we are in the beggining of the file
        # or move there to read the header
        if f.tell() != 0:
            f.seek(0)
        
        # First read header information and check correct file format
        header = struct.unpack(">I 4s 9I f 11I", f.read(92))
        if header[0] == 84 and header[1] == 'CORD' and header[-1] == 84 and header[-2] != 0:
            #print "recognized 32 bit DCD file of opposite endianness"
            #Store the number of sets of coordinates (NSET). Frames
            self.nset = header[2]
            #Store ISTART, the starting timestep
            self.istart = header[3]
            #Store NSAVC, the number of steps between dcd saves
            self.nsavc = header[4]
            # Store NTOT, number of total simulation steps
            self.ntot = header[5]
            #Store DELTA, the time step of simulation
            self.delta = header[11]
            # Have box information?
            self.has_extrablock = bool(header[12])
            # Have a 4th dimension?
            self.had_4dims = bool(header[13])
        else:
            f.close()
            sys.exit("Bad DCD Format")
            
        # Read title information
        if (unpack('>I',f.read(4))[0] - 4) % 80 == 0:
            # Number of title lines
            self.ntitle = int(unpack('>I', f.read(4))[0])
            self.title = [unpack(">80s",f.read(80)) for i in range(self.ntitle)]
            f.read(4) # Skip closing block number
        else:
            f.close()
            sys.exit("ERROR in title. Bad DCD format")
        
        # Read number of atoms
        atomBlock = unpack('>3I', f.read(12))
        if atomBlock[-1] == 4:
            self.natoms = atomBlock[1]
        else:
            f.close()
            sys.exit("Bad DCD format")
        
        if verbose:
            print self.title
            print "Number of atoms:", self.natoms
            print "Number of frames:",self.nset
            print "Starting timestep", self.istart
            print "Final timestep:", self.ntot
            print "Steps between frames:", self.nsavc
            print "Time step of simulation:", self.delta
    
    def set_pointerInfo(self):
        """
        Store sizes for browsing the file later
        """
        # Header size is: 116 + 80* self.ntitle
        self.h_size = 116 + (80 * self.ntitle)
        
        # Frame size
        # 4 bytes because it's floats per 3 axis per total num of atoms
        # Add the enclosing integers (two for each axis) = 6 * 4
        f_size = (3 * 4 * self.natoms) + 24
        if self.has_extrablock:
            f_size += 56
        
        self.f_size = f_size
           
    def read_charmm_extrablock(self):
        
        f = self.dcd
        unpack = struct.unpack
        
        # This block contains the box information
        if unpack('>I', f.read(4))[0] == 48:
            self.unitcell = npy.fromstring( f.read(48), dtype=">d")
            f.read(4)
        else:
            f.close()
            sys.exit("ERROR in read_charmm_extrablock(). Bad DCD Format")
            
    def read_dcdstep(self):
      
        f = self.dcd
        size = struct.calcsize
        
        # If there is box information
        if self.has_extrablock:
            self.read_charmm_extrablock()
        
        # Read coordinates
        # Each coordinates block is enclosed by one integer
        # that we will skip all the times
        xyz = npy.zeros([self.natoms, 3], dtype=">f4")
        f.read(4)
        xyz[:,0] = npy.fromstring(f.read(size('f')*self.natoms), dtype=">f4")
        f.read(8)
        xyz[:,1] = npy.fromstring(f.read(size('f')*self.natoms), dtype=">f4")
        f.read(8)
        xyz[:,2] = npy.fromstring(f.read(size('f')*self.natoms), dtype=">f4")
        f.read(4)
        
        return xyz
        
    def read_all(self):
        """
        Read all snapshots
        """
        # Go to the beggining of the frames
        f = self.dcd
        f.seek(self.h_size)
        
        # Read Frames
        all_snap = npy.zeros([self.nset, self.natoms, 3], dtype=">f4")
        for i in range(self.nset):
            all_snap[i,:] = self.read_dcdstep()
        
        return all_snap
    
    def close(self):
        self.dcd.close()
        
    def getFrame(self, i):
        """
        Read specific frame.
                """
        f = self.dcd

        # Calculate pointer position
        # for the desired frame and move there
        pointer = self.h_size + (self.f_size * i)
        f.seek(pointer)
        
        return self.read_dcdstep()
    
    def __getitem__(self, i):
        return self.getFrame(i)
コード例 #29
0
ファイル: averageASA.py プロジェクト: ostrokach/biskit
def randomSurfaces( base_folder, label, mask ):
    """
    calculate surfaces for all peptides and return the
    average and SD
    """
    ## container for results and standard deviations
    MS,    AS    = {}, {}
    MS_sd, AS_sd = {}, {}

    ## loop over peptide directories
    for k in MOU.aaAtoms.keys():
        dir = base_folder + 'GLY-%s-GLY_pcr/pcr_00'%(k)
        fLst = glob.glob( dir + '/*.pdb')
        
        msLst = []
        asLst = []
        
        ## loop over pdb files for each peptide
        T.flushPrint( '\nNow collecting data in %s'%dir )
        for f in fLst:

            ## load peptide and remove waters and hydrogens
            m = PDBModel( f )
            m = m.compress( m.maskProtein() * m.maskHeavy() )
            T.flushPrint( '.')

            ## add surface data
            try:
                d = PDBDope( m )
                d.addSurfaceRacer( probe=1.4 )

                ## remove tailing GLY
                m = m.compress( m.res2atomMask(mask) )
                
                ## collect surface data for each peptide
                msLst += [ m.profile('MS') ]
                asLst += [ m.profile('AS') ]
                       
            except:
                print 'Failed calculating exposure for GLY-%s-GLY'%(k)
                print '\t and file %s'%f
                
        ## get result dictionary for peptide
        T.flushPrint('\nCollecting data ...\n')
        msDic = {}
        asDic = {}
        msDic_sd = {}
        asDic_sd = {}

        j = 0
        #atoms =  [ a['name'] for a in m.atoms ]
        for n in m['name']:
            msDic[n]    = N.average(msLst)[j]
            asDic[n]    = N.average(asLst)[j]
            msDic_sd[n] = MAU.SD( msLst )[j]
            asDic_sd[n] = MAU.SD( asLst )[j]
            j += 1

        MS[ k ] = msDic
        AS[ k ] = asDic
        MS_sd[ k ] = msDic_sd
        AS_sd[ k ] = asDic_sd

    return MS, AS, MS_sd, AS_sd
コード例 #30
0
ファイル: dope.py プロジェクト: graik/biskit
def changeModel( inFile, prefix, sourceModel ):

    print '\nget ' + os.path.basename( inFile ) + '..',

    model = PDBModel( inFile )

    model.update()

    model = model.sort()

    eq = model.equals( sourceModel )
    if not eq[0] and eq[1]:
        raise ConvertError('source and other models are not equal: ' + str(eq))

#    model.validSource()
    model.setSource( sourceModel.validSource() )

    #model.atomsChanged = 0
    for k in model.atoms:
        model.atoms[k,'changed'] = N0.all( model[k] == sourceModel[k] )

    model.xyzChanged = ( 0 != N0.sum( N0.ravel( model.xyz - sourceModel.xyz)) )

    model.update( updateMissing=1 )

    if model.xyzChanged:

        doper = PDBDope( model )

        if 'MS' in sourceModel.atoms.keys():
            doper.addSurfaceRacer( probe=1.4 )

        if 'density' in sourceModel.atoms.keys():
            doper.addDensity()

##        if 'foldX' in sourceModel.info.keys():
##            doper.addFoldX()
            
        if 'delphi' in sourceModel.info.keys():
            doper.addDelphi()

    outFile = os.path.dirname( inFile ) + '/' + prefix +\
            T.stripFilename( inFile ) + '.model' 

    T.dump( model, outFile )

    print '-> ' + os.path.basename( outFile )
コード例 #31
0
ファイル: AmberParmBuilder.py プロジェクト: ostrokach/biskit
class AmberParmBuilder:
    """
    AmberParmBuilder
    ================
    Create Amber topology and coordinate file from PDB.

      - parmMirror():
         ...builds a fake parm that exactly mirrors a given PDB file.
         This parm can be used for ptraj but not for simulations.
         Currently, parmMirror only accepts amber-formatted PDBs as
         input. It should be possible to create topologies that have
         the same content and order of atoms as an xplor PDB but
         some atoms will have different names.

      - parmSolvated():
         ...builds a solvated system for PME simulations (incl. closing
         of S-S bonds, capping of chain breaks). parmSolvated accepts
         both xplor and amber-formatted PDBs as input.

    Requires the amber programs C{tleap} and C{ambpdb}.
    Requires leap template files in C{biskit/Biskit/data/amber/leap/}.
    
    Note on forcefields:

       The default forcefield used is specified in exe_tleap and currently
       is ff10. This translates to loading amber11/dat/leap/cmd/leaprc.ff10
       at the beginning of the leap run. As of 2011, ff10 is the recommended
       default forcefield for proteins and nucleic acids.
       Comment from Jason Swails on the Amber mailing list: 
       "
       Try using ff99SB (which is the protein force field part of ff10, which is
       the version I would actually suggest using).  Despite its label, it is
       actually a 2006 update of the ff99 force field which performs at least as
       well (if not better) as ff03."
       
       Unfortunately, ions are only "half" paramterized in ff10. Additional 
       parameters need to be loaded from a frmod file, typically 
       frcmod.ionsjc_tip3p. There are additional versions of this file optimized
       for other water models than TIP3. frcmod.ionsjc_tip3p is set as the 
       default frmod file to include by parmSolvated and parmMirror. Please
       include it if you provide your own list of frmod files.

    @note: The design of AmberParmBuilder is less than elegant. It
           would make more sense to split it into two classes that
           are both derrived from Executor.
    """

    ## script to create a parm that exactly mirrors a given PDB
    script_mirror_pdb = """
    logFile %(f_out)s
    source %(leaprc)s
    %(fmod)s
    %(fprep)s
    p = loadPdb %(in_pdb)s
    %(delete_atoms)s
    saveAmberParm p %(out_parm)s %(out_crd)s
    quit
    """

    ## tleap command to close a single S-S bond
    ss_bond = "bond p.%i.SG p.%i.SG\n"

    ## leap script for solvated topology
    F_leap_in = t.dataRoot() + '/amber/leap/solvate_box.leap'
    ## PDB with ACE capping residue
    F_ace_cap = t.dataRoot() + '/amber/leap/ace_cap.pdb'
    ## PDB with NME capping residue
    F_nme_cap = t.dataRoot() + '/amber/leap/nme_cap.pdb'

    def __init__(self,
                 model,
                 leap_template=F_leap_in,
                 leaprc=None,
                 leap_out=None,
                 leap_in=None,
                 leap_pdb=None,
                 log=None,
                 debug=0,
                 verbose=0,
                 **kw):
        """
        @param model: model
        @type  model: PDBModel or str
        @param leap_template: path to template file for leap input
        @type  leap_template: str
        @param leaprc: forcefield parameter file or code (e.g. ff99)
        @type  leaprc: str
        @param leap_out: target file for leap.log (default: discard)
        @type  leap_out: str
        @param leap_in: target file for leap.in script (default: discard)
        @type  leap_in: str
        @param kw: kw=value pairs for additional options in the leap_template
        @type  kw: key=value
        """
        self.m = PDBModel(model)

        self.leap_template = leap_template
        self.leaprc = leaprc

        self.leap_pdb = leap_pdb or tempfile.mktemp('_leap_pdb')
        self.keep_leap_pdb = leap_pdb is not None

        self.leap_in = leap_in
        self.leap_out = leap_out

        self.log = log or StdLog()

        self.output = None  # last output of leap

        self.debug = debug
        self.verbose = verbose

        self.__dict__.update(kw)

    def __runLeap(self, in_script, in_pdb, norun=0, **kw):
        """
        Create script file and run Leap.

        @param in_script: content of ptraj script with place holders
        @type  in_script: str
        @param in_pdb: PDB file to load into tleap
        @type  in_pdb: str
        @param norun: 1 - only create leap scrip (default: 0)
        @type  norun: 1|0
        @param kw: key=value pairs for filling place holders in script
        @type  kw: key=value

        @raise AmberError: if missing option for leap input file or
                           if could not create leap input file
        """
        x = AmberLeap(in_script,
                      in_pdb=in_pdb,
                      log=self.log,
                      verbose=self.verbose,
                      debug=self.debug,
                      catch_out=True,
                      f_in=self.leap_in,
                      f_out=self.leap_out,
                      **kw)
        if norun:
            x.generateInp()
        else:
            x.run()
            self.output = x.output

##         ## create leap script
##         try:
##             ## use own fields and given kw as parameters for leap script
##             d = copy.copy( self.__dict__ )
##             d.update( kw )

##             in_script = in_script % d
##             f = open( self.leap_in, 'w')
##             f.write( in_script )
##             f.close()

##             if self.verbose:
##                 self.log.add('leap-script: ')
##                 self.log.add( in_script )

##         except IOError:
##             raise AmberError('Could not create leap input file')
##         except:
##             raise AmberError('missing option for leap input file\n'+\
##                              'available: %s' % (str( d.keys() ) ))

##         ## run tleap
##         args = '-f %s' % self.leap_in

##         if not norun:
##             self.exe = Executor('tleap', args, log=self.log,verbose=1,
##                                 catch_out=0)
##             self.output, self.error, self.status = self.exe.run()

##             if not os.path.exists( kw['out_parm'] ):
##                 raise AmberError, "tleap failed"

##         ## clean up

##         if not self.keep_leap_in and not self.debug:
##             t.tryRemove( self.leap_in )
##         if not self.keep_leap_out and not self.debug:
##             t.tryRemove( self.leap_out)

    def parm2pdb(self, f_parm, f_crd, f_out, aatm=0):
        """
        Use ambpdb to build PDB from parm and crd.

        @param f_parm: existing parm file
        @type  f_parm: str
        @param f_crd: existing crd file
        @type  f_crd: str
        @param f_out: target file name for PDB
        @type  f_out: str

        @return: f_out, target file name for PDB
        @rtype: str

        @raise AmberError: if ambpdb fail
        """
        ##         cmd = '%s -p %s -aatm < %s > %s' % \
        args = '-p %s %s' % (f_parm, '-aatm' * aatm)

        x = Executor('ambpdb',
                     args,
                     f_in=f_crd,
                     f_out=f_out,
                     log=self.log,
                     verbose=1,
                     catch_err=1)

        output, error, status = x.run()

        if not os.path.exists(f_out):
            raise AmberError, 'ambpdb failed.'

        return f_out

    def __ssBonds(self, model, cutoff=4.):
        """
        Identify disulfide bonds.

        @param model: model
        @type  model: PDBModel        
        @param cutoff: distance cutoff for S-S distance (default: 4.0)
        @type  cutoff: float
        
        @return: list with numbers of residue pairs forming S-S
        @rtype: [(int, int)]
        """
        m = model.compress(model.mask(['SG']))

        if len(m) < 2:
            return []

        pw = MU.pairwiseDistances(m.xyz, m.xyz)

        pw = N.less(pw, cutoff)

        r = []
        for i in range(len(pw)):
            for j in range(i + 1, len(pw)):
                if pw[i, j]:
                    r += [(m.atoms['residue_number'][i],
                           m.atoms['residue_number'][j])]
        return r

    def __cys2cyx(self, model, ss_residues):
        """
        Rename all S-S bonded CYS into CYX.

        @param model: model
        @type  model: PDBModel
        @param ss_residues: original residue numbers of S-S pairs
        @type  ss_residues: [(int, int)]
        """
        ss = []
        for a, b in ss_residues:
            ss += [a, b]

        for a in model:
            if a['residue_number'] in ss:
                a['residue_name'] = 'CYX'

    def capACE(self, model, chain):
        """
        Cap N-terminal of given chain.

        @param model: model
        @type  model: PDBMode
        @param chain: index of chain to be capped
        @type  chain: int
        """
        cleaner = PDBCleaner(model, log=self.log)
        return cleaner.capACE(model, chain, breaks=True)

    def capNME(self, model, chain):
        """
        Cap C-terminal of given chain.

        @param model: model
        @type  model: PDBMode
        @param chain: index of chain to be capped
        @type  chain: int        
        """
        cleaner = PDBCleaner(model, log=self.log)
        return cleaner.capNME(model, chain, breaks=True)

    def centerModel(self, model):
        """
        Geometric centar of model.
        
        @param model: model
        @type  model: PDBMode
        """
        center = N.average(model.getXyz())
        model.setXyz(model.xyz - center)

    def leapModel(self, hetatm=0, center=True):
        """
        Get a clean PDBModel for input into leap.

        @param hetatm: keep HETATM records (default: 0)
        @type  hetatm: 1|0

        @return: model
        @rtype: PDBMod
        """
        m = self.m.clone()
        m.xplor2amber()

        cleaner = PDBCleaner(m, log=self.log, verbose=self.verbose)
        m = cleaner.process(keep_hetatoms=hetatm, amber=1)

        m.renumberResidues(addChainId=1)

        if center:
            self.centerModel(m)

        return m

    def __fLines(self, template, values):
        if not type(values) is list:
            values = [values]

        return ''.join([template % v for v in values])

    def parmSolvated(self,
                     f_out,
                     f_out_crd=None,
                     f_out_pdb=None,
                     hetatm=0,
                     norun=0,
                     cap=0,
                     capN=[],
                     capC=[],
                     fmod=['frcmod.ionsjc_tip3p'],
                     fprep=[],
                     box=10.0,
                     center=True,
                     **kw):
        """
        @param f_out: target file for parm (topology)
        @type  f_out: str
        @param f_out_crd: target file for crd (coordinates)
                          (default:|f_out_base|.crd)
        @type  f_out_crd: str
        @param f_out_pdb: target file for pdb (default:|f_out_base|.pdb)
        @type  f_out_pdb: str
        @param hetatm: keep hetero atoms (default: 0)
        @type  hetatm: 1|0
        @param cap: put ACE and NME capping residue on chain breaks 
                    (default: 0)
        @type  cap: 1|0
        @param capN: indices of chains that should get ACE cap (default: [])
        @type  capN: [int]
        @param capC: indices of chains that should get NME cap (default: [])
        @type  capC: [int]
        @param box: minimal distance of solute from box edge (default: 10.0)
        @type  box: float
        @param center: re-center coordinates (default: True)
        @type  center: bool
        @param fmod: list of files with amber parameter modifications
                     to be loaded into leap with loadAmberParams
                    (default:['frcmod.ionsjc_tip3p'] ... mod file needed for 
                    default Amber ff10 ions -- topology saving will fail if this 
                    one is missing)
        @type  fmod: [str]
        @param fprep: list of files with amber residue definitions
                    (to be loaded into leap with loadAmberPrep) (default: [])
        @type  fprep: [str]
        @param kw: additional key=value pairs for leap input template
        @type  kw: key=value

        @raise IOError:
        """
        f_out = t.absfile(f_out)
        f_out_crd = t.absfile(f_out_crd) or t.stripSuffix(f_out) + '.crd'
        f_out_pdb = t.absfile( f_out_pdb ) or t.stripSuffix( f_out ) +\
                    '_leap.pdb'

        ## removed: (bugfix 3434136)
        #fmod  = [ t.absfile( f ) for f in t.toList( fmod )  ]
        #fprep = [ t.absfile( f ) for f in t.toList( fprep ) ]

        try:
            if self.verbose: self.log.add('\nCleaning PDB file for Amber:')
            m = self.leapModel(hetatm=hetatm, center=center)

            if cap:
                end_broken = m.atom2chainIndices(m.chainBreaks())
                capC = MU.union(capC, end_broken)
                capN = MU.union(capN, N.array(end_broken) + 1)

            for i in capN:
                if self.verbose:
                    self.log.add('Adding ACE cap to chain %i' % i)
                m = self.capACE(m, i)

            for i in capC:
                if self.verbose:
                    self.log.add('Adding NME cap to chain %i' % i)
                m = self.capNME(m, i)

            m.renumberResidues(addChainId=1)  ## again, to accomodate capping

            template = open(self.leap_template).read()

            leap_mod = self.__fLines('m = loadAmberParams %s\n', fmod)
            leap_prep = self.__fLines('loadAmberPrep %s\n', fprep)

            ss = self.__ssBonds(m, cutoff=4.)
            self.__cys2cyx(m, ss)
            leap_ss = self.__fLines(self.ss_bond, ss)
            if self.verbose:
                self.log.add('Found %i disulfide bonds: %s' %
                             (len(ss), str(ss)))

            if self.verbose:
                self.log.add('writing cleaned PDB to %s' % self.leap_pdb)
            m.writePdb(self.leap_pdb, ter=3)

            self.__runLeap(template,
                           in_pdb=self.leap_pdb,
                           out_parm=f_out,
                           out_crd=f_out_crd,
                           ss_bonds=leap_ss,
                           fmod=leap_mod,
                           fprep=leap_prep,
                           norun=norun,
                           box=box,
                           **kw)

            if not norun:
                parm_pdb = self.parm2pdb(f_out, f_out_crd, f_out_pdb)

            if not self.keep_leap_pdb and not self.debug:
                t.tryRemove(self.leap_pdb)

        except IOError, why:
            raise IOError, why
コード例 #32
0
    def go(self, model_list=None, reference=None):
        """
        Run benchmarking.

        @param model_list: list of models
                           (default: None S{->} outFolder/L{F_PDBModels})
        @type  model_list: ModelList
        @param reference: reference model
                        (default: None S{->} outFolder/L{F_INPUT_REFERENCE})
        @type  reference: PDBModel
        """
        model_list = model_list or self.outFolder + self.F_PDBModels
        reference = reference or self.outFolder + self.F_INPUT_REFERENCE

        pdb_list = T.load('%s' % model_list)
        reference = PDBModel(reference)

        # check with python 2.4
        iref, imodel = reference.compareAtoms(pdb_list[0])

        mask_casting = N0.zeros(len(pdb_list[0]))
        N0.put(mask_casting, imodel, 1)

        reference = reference.take(iref)
        #reference_mask_CA = reference_rmsd.maskCA()

        atom_mask = N0.zeros(len(pdb_list[0]))
        N0.put(atom_mask, imodel, 1)

        rmask = pdb_list[0].profile2mask("n_templates", 1, 1000)
        amask = pdb_list[0].res2atomMask(rmask)

        mask_final_ref = N0.compress(mask_casting, amask)
        mask_final = mask_casting * amask

        reference = reference.compress(mask_final_ref)

        for i in range(len(pdb_list)):

            #self.cad(reference, pdb_list[i])

            pdb_list[i], pdb_wo_if = self.output_fittedStructures(\
                pdb_list[i], reference, i, mask_final)

            fitted_model_if = pdb_list[i].compress(mask_final)
            fitted_model_wo_if = pdb_wo_if.compress(mask_final)

            coord1 = reference.getXyz()
            coord2 = fitted_model_if.getXyz()

            aprofile = self.rmsd_res(coord1, coord2)

            self.calc_rmsd(fitted_model_if, fitted_model_wo_if, reference,
                           pdb_list[i])

            pdb_list[i].atoms.set('rmsd2ref_if',
                                  aprofile,
                                  mask=mask_final,
                                  default=-1,
                                  comment="rmsd to known reference structure")

        self.output_rmsd_aa(pdb_list)
        self.output_rmsd_ca(pdb_list)
        self.output_rmsd_res(pdb_list)

        self.write_PDBModels(pdb_list)
コード例 #33
0
ファイル: Benchmark.py プロジェクト: ostrokach/biskit
    def go(self, model_list = None, reference = None):
        """
        Run benchmarking.

        @param model_list: list of models
                           (default: None S{->} outFolder/L{F_PDBModels})
        @type  model_list: ModelList
        @param reference: reference model
                        (default: None S{->} outFolder/L{F_INPUT_REFERENCE})
        @type  reference: PDBModel
        """
        model_list = model_list or self.outFolder + self.F_PDBModels
        reference = reference or self.outFolder + self.F_INPUT_REFERENCE

        pdb_list = T.load('%s'%model_list)
        reference = PDBModel(reference)

        # check with python 2.4
        iref, imodel = reference.compareAtoms(pdb_list[0])

        mask_casting = N.zeros(len(pdb_list[0]))
        N.put(mask_casting, imodel, 1)

        reference = reference.take(iref)
        #reference_mask_CA = reference_rmsd.maskCA()

        atom_mask = N.zeros(len(pdb_list[0]))
        N.put(atom_mask,imodel,1)

        rmask = pdb_list[0].profile2mask("n_templates", 1,1000)
        amask = pdb_list[0].res2atomMask(rmask)

        mask_final_ref = N.compress(mask_casting, amask)
        mask_final = mask_casting * amask

        reference = reference.compress(mask_final_ref)

        for i in range(len(pdb_list)):

            #self.cad(reference, pdb_list[i])

            pdb_list[i], pdb_wo_if = self.output_fittedStructures(\
                pdb_list[i], reference, i, mask_final)

            fitted_model_if = pdb_list[i].compress(mask_final)
            fitted_model_wo_if = pdb_wo_if.compress(mask_final)

            coord1 = reference.getXyz()
            coord2 = fitted_model_if.getXyz()

            aprofile = self.rmsd_res(coord1,coord2)

            self.calc_rmsd(fitted_model_if, fitted_model_wo_if,
                           reference, pdb_list[i])

            pdb_list[i].atoms.set('rmsd2ref_if', aprofile,
                                  mask=mask_final, default = -1,
                                  comment="rmsd to known reference structure")

        self.output_rmsd_aa(pdb_list)
        self.output_rmsd_ca(pdb_list)
        self.output_rmsd_res(pdb_list)

        self.write_PDBModels(pdb_list)
コード例 #34
0
    def capACE(self, model, chain, breaks=True):
        """
        Cap N-terminal of given chain.

        Note: In order to allow the capping of chain breaks,
        the chain index is, by default, based on model.chainIndex(breaks=True), 
        that means with chain break detection activated! This is not the 
        default behaviour of PDBModel.chainIndex or takeChains or chainLength. 
        Please use the wrapping method capTerminals() for more convenient 
        handling of the index.

        @param model: model
        @type  model: PDBMode
        @param chain: index of chain to be capped
        @type  chain: int
        @param breaks: consider chain breaks when identifying chain boundaries
        @type  breaks: bool
        
        @return: model with added NME capping
        @rtype : PDBModel
        """
        if self.verbose:
            self.logWrite('Capping N-terminal of chain %i with ACE' % chain)

        c_start = model.chainIndex(breaks=breaks)
        c_end = model.chainEndIndex(breaks=breaks)
        Nterm_is_break = False
        Cterm_is_break = False

        if breaks:
            Nterm_is_break = c_start[chain] not in model.chainIndex()
            Cterm_is_break = c_end[chain] not in model.chainEndIndex()

        m_ace = PDBModel(self.F_ace_cap)

        chains_before = model.takeChains(range(chain), breaks=breaks)
        m_chain = model.takeChains([chain], breaks=breaks)
        chains_after = model.takeChains(range(chain + 1, len(c_start)),
                                        breaks=breaks)

        m_term = m_chain.resModels()[0]

        ## we need 3 atoms for superposition, CB might mess things up but
        ## could help if there is no HN
        ##        if 'HN' in m_term.atomNames():
        m_ace.remove(['CB'])  ## use backbone 'C' rather than CB for fitting

        ## rename overhanging residue in cap PDB
        for a in m_ace:
            if a['residue_name'] != 'ACE':
                a['residue_name'] = m_term.atoms['residue_name'][0]
            else:
                a['residue_number'] = m_term.atoms['residue_number'][0] - 1
                a['chain_id'] = m_term.atoms['chain_id'][0]
                a['segment_id'] = m_term.atoms['segment_id'][0]

        ## fit cap onto first residue of chain
        m_ace = m_ace.magicFit(m_term)

        cap = m_ace.resModels()[0]
        serial = m_term['serial_number'][0] - len(cap)
        cap['serial_number'] = range(serial, serial + len(cap))

        ## concat cap on chain
        m_chain = cap.concat(m_chain, newChain=False)

        ## re-assemble whole model
        r = chains_before.concat(m_chain, newChain=not Nterm_is_break)

        r = r.concat(chains_after, newChain=not Cterm_is_break)

        if len(c_start) != r.lenChains(breaks=breaks):
            raise CappingError, 'Capping ACE would mask a chain break. '+\
                  'This typically indicates a tight gap with high risk of '+\
                  'clashes and other issues.'

        return r
コード例 #35
0
def randomSurfaces( base_folder, label, mask ):
    """
    calculate surfaces for all peptides and return the
    average and SD
    """
    ## container for results and standard deviations
    MS,    AS    = {}, {}
    MS_sd, AS_sd = {}, {}

    ## loop over peptide directories
    for k in MOU.aaAtoms.keys():
        dir = base_folder + 'GLY-%s-GLY_pcr/pcr_00'%(k)
        fLst = glob.glob( dir + '/*.pdb')
        
        msLst = []
        asLst = []
        
        ## loop over pdb files for each peptide
        T.flushPrint( '\nNow collecting data in %s'%dir )
        for f in fLst:

            ## load peptide and remove waters and hydrogens
            m = PDBModel( f )
            m = m.compress( m.maskProtein() * m.maskHeavy() )
            T.flushPrint( '.')

            ## add surface data
            try:
                d = PDBDope( m )
                d.addSurfaceRacer( probe=1.4 )

                ## remove tailing GLY
                m = m.compress( m.res2atomMask(mask) )
                
                ## collect surface data for each peptide
                msLst += [ m.profile('MS') ]
                asLst += [ m.profile('AS') ]
                       
            except:
                print 'Failed calculating exposure for GLY-%s-GLY'%(k)
                print '\t and file %s'%f
                
        ## get result dictionary for peptide
        T.flushPrint('\nCollecting data ...\n')
        msDic = {}
        asDic = {}
        msDic_sd = {}
        asDic_sd = {}

        j = 0
        #atoms =  [ a['name'] for a in m.atoms ]
        for n in m['name']:
            msDic[n]    = N0.average(msLst)[j]
            asDic[n]    = N0.average(asLst)[j]
            msDic_sd[n] = MAU.SD( msLst )[j]
            asDic_sd[n] = MAU.SD( asLst )[j]
            j += 1

        MS[ k ] = msDic
        AS[ k ] = asDic
        MS_sd[ k ] = msDic_sd
        AS_sd[ k ] = asDic_sd

    return MS, AS, MS_sd, AS_sd
コード例 #36
0
    def capNME(self, model, chain, breaks=True):
        """
        Cap C-terminal of given chain. 

        Note: In order to allow the capping of chain breaks,
        the chain index is, by default, based on model.chainIndex(breaks=True), 
        that means with chain break detection activated! This is not the 
        default behaviour of PDBModel.chainIndex or takeChains or chainLength.
        Please use the wrapping method capTerminals() for more convenient 
        handling of the index.

        @param model: model
        @type  model: PDBMode
        @param chain: index of chain to be capped
        @type  chain: int
        @param breaks: consider chain breaks when identifying chain boundaries
        @type  breaks: bool
        
        @return: model with added NME capping residue
        @rtype : PDBModel
        """
        if self.verbose:
            self.logWrite('Capping C-terminal of chain %i with NME.' % chain)
        m_nme = PDBModel(self.F_nme_cap)

        c_start = model.chainIndex(breaks=breaks)
        c_end = model.chainEndIndex(breaks=breaks)
        Nterm_is_break = False
        Cterm_is_break = False
        if breaks:
            Nterm_is_break = c_start[chain] not in model.chainIndex()
            Cterm_is_break = c_end[chain] not in model.chainEndIndex()

        chains_before = model.takeChains(range(chain), breaks=breaks)
        m_chain = model.takeChains([chain], breaks=breaks)
        chains_after = model.takeChains(range(chain + 1, len(c_start)),
                                        breaks=breaks)

        m_term = m_chain.resModels()[-1]

        ## rename overhanging residue in cap PDB, renumber cap residue
        for a in m_nme:
            if a['residue_name'] != 'NME':
                a['residue_name'] = m_term.atoms['residue_name'][0]
            else:
                a['residue_number'] = m_term.atoms['residue_number'][0] + 1
                a['chain_id'] = m_term.atoms['chain_id'][0]
                a['segment_id'] = m_term.atoms['segment_id'][0]

        ## chain should not have any terminal O after capping
        m_chain.remove(['OXT'])

        ## fit cap onto last residue of chain
        m_nme = m_nme.magicFit(m_term)

        cap = m_nme.resModels()[-1]
        serial = m_term['serial_number'][-1] + 1
        cap['serial_number'] = range(serial, serial + len(cap))

        ## concat cap on chain
        m_chain = m_chain.concat(cap, newChain=False)

        ## should be obsolete now
        if getattr(m_chain, '_PDBModel__terAtoms', []) != []:
            m_chain._PDBModel__terAtoms = [len(m_chain) - 1]
        assert m_chain.lenChains() == 1

        ## re-assemble whole model
        r = chains_before.concat(m_chain, newChain=not Nterm_is_break)
        r = r.concat(chains_after, newChain=not Cterm_is_break)

        if len(c_start) != r.lenChains(breaks=breaks):
            raise CappingError, 'Capping NME would mask a chain break. '+\
                  'This typically indicates a tight gap with high risk of '+\
                  'clashes and other issues.'

        return r
コード例 #37
0
ファイル: AmberParmBuilder.py プロジェクト: ostrokach/biskit
    def parmMirror(self,
                   f_out,
                   f_out_crd=None,
                   fmod=['frcmod.ionsjc_tip3p'],
                   fprep=[],
                   **kw):
        """
        Create a parm7 file whose atom content (and order) exactly mirrors
        the given PDBModel. This requires two leap runs. First we get a
        temporary topology, then we identify all atoms added by leap and
        build a final topology where these atoms are deleted.
        This parm is hence NOT suited for simulations but can be used to parse
        e.g. a trajectory or PDB into ptraj.

        @param f_out: target parm file
        @type  f_out: str
        @param f_out_crd: target crd file (default: f_out but ending .crd)
        @type  f_out_crd: str
        @param fmod : list of amber Mod files (loaded with loadAmberParams)
        @type  fmod : [str]
        @param fmod : list of amber Prep files (loaded with loadAmberPrep)
        @type  fmod : [str]
        """
        f_out = t.absfile(f_out)
        f_out_crd = t.absfile(f_out_crd) or t.stripSuffix(f_out) + '.crd'

        ## if there are hydrogens, recast them to standard amber names
        aatm = 'HA' in self.m.atomNames()  ## 'HB2' in self.m.atomNames()

        ## First leap round ##
        m_ref = self.m.clone()
        m_ref.xplor2amber(aatm=aatm, parm10=True)
        tmp_in = tempfile.mktemp('leap_in0.pdb')
        m_ref.writePdb(tmp_in, ter=3)

        tmp_parm = tempfile.mktemp('_parm0')
        tmp_crd = tempfile.mktemp('_crd0')

        leap_mod = self.__fLines('m = loadAmberParams %s\n', fmod)
        leap_prep = self.__fLines('loadAmberPrep %s\n', fprep)

        self.__runLeap(self.script_mirror_pdb,
                       leaprc=self.leaprc,
                       fmod=leap_mod,
                       fprep=leap_prep,
                       in_pdb=tmp_in,
                       out_parm=tmp_parm,
                       out_crd=tmp_crd,
                       delete_atoms='')

        tmp_pdb = self.parm2pdb(tmp_parm,
                                tmp_crd,
                                tempfile.mktemp('leap_out.pdb'),
                                aatm=aatm)

        if not self.debug:
            t.tryRemove(tmp_parm)
            t.tryRemove(tmp_crd)
            t.tryRemove(tmp_in)

        ## load model with missing atoms added by leap
        m_leap = PDBModel(tmp_pdb)

        ## compare atom content
        iLeap, iRef = m_leap.compareAtoms(m_ref)

        ## check that ref model doesn't need any change
        if iRef != range(len(m_ref)):
            uLeap, uRef = m_leap.unequalAtoms(m_ref, iLeap, iRef)
            atms = m_ref.reportAtoms(uRef, n=6)
            raise AmberError, "Cannot create exact mirror of %s.\n" % tmp_in +\
                  "Leap has renamed/deleted original atoms in %s:\n"% tmp_pdb+\
                  atms

        ## indices of atoms that were added by leap
        delStr = self.__deleteAtoms(m_leap,
                                    self.__inverseIndices(m_leap, iLeap))

        ## Second leap round ##
        self.__runLeap(self.script_mirror_pdb,
                       leaprc=self.leaprc,
                       in_pdb=tmp_pdb,
                       fmod=leap_mod,
                       fprep=leap_prep,
                       out_parm=f_out,
                       out_crd=f_out_crd,
                       delete_atoms=delStr)

        if not self.debug:
            t.tryRemove(tmp_pdb)
コード例 #38
0
class PDBCleaner:
    """
    PDBCleaner performs the following tasks:
    
      * remove HETAtoms from PDB
      * replace non-standard AA by its closest standard AA
      * remove non-standard atoms from standard AA residues
      * delete atoms that follow missing atoms (in a chain)
      * remove multiple occupancy atoms (except the one with highest occupancy)
      * add ACE and NME capping residues to C- and N-terminals or chain breaks
        (see capTerminals(), this is NOT done automatically in process())

    Usage:
    =======

      >>> c = PDBCleaner( model )
      >>> c.process()
      >>> c.capTerminals( auto=True )

    This will modify the model in-place and report changes to STDOUT.
    Alternatively, you can specify a log file instance for the output.
    PDBCleaner.process accepts several options to modify the processing.
    
    Capping
    =======
    
    Capping will add N-methyl groups to free C-terminal carboxy ends
    or Acetyl groups to free N-terminal Amines and will thus 'simulate' the
    continuation of the protein chain -- a common practice in order to 
    prevent fake terminal charges. The automatic discovery of missing residues
    is guess work at best. The more conservative approach is to use,
    for example:
    
      >>> c.capTerminals( breaks=1, capC=[0], capN=[2] )
      
    In this case, only the chain break detection is used for automatic capping
    -- the last residue before a chain break is capped with NME and the first
    residue after the chain break is capped with ACE. Chain break detection
    relies on PDBModel.chainBreaks() (via PDBModel.chainIndex( breaks=1 )).
    The normal terminals to be capped are now specified explicitely. The first
    chain (not counting chain breaks) will receive a NME C-terminal cap and the
    third chain of the PDB will receive a N-terminal ACE cap. 
    
    Note: Dictionaries with standard residues and atom content are defined
          in Biskit.molUtils. This is a duplicate effort with the new strategy
          to parse Amber prep files for very similar information
          (AmberResidueType, AmberResidueLibrary) and should change once we 
          implement a real framework for better residue handling. 
    """

    #: these atoms always occur at the tip of of a chain or within a ring
    #: and, if missing, will not trigger the removal of other atoms
    TOLERATE_MISSING = [
        'O',
        'CG2',
        'CD1',
        'CD2',
        'OG1',
        'OE1',
        'NH1',
        'OD1',
        'OE1',
        'H5T',
        "O5'",
    ]

    ## PDB with ACE capping residue
    F_ace_cap = t.dataRoot() + '/amber/leap/ace_cap.pdb'
    ## PDB with NME capping residue
    F_nme_cap = t.dataRoot() + '/amber/leap/nme_cap.pdb'

    def __init__(self, fpdb, log=None, verbose=True):
        """
        @param fpdb: pdb file OR PDBModel instance
        @type  fpdb: str OR Biskit.PDBModel
        @param log: Biskit.LogFile object (default: STDOUT)
        @type  log: Biskit.LogFile
        @param verbose: log warnings and infos (default: True)
        @type  verbose: bool
        """
        self.model = PDBModel(fpdb)
        self.log = log or StdLog()
        self.verbose = verbose

    def logWrite(self, msg, force=1):
        if self.log:
            self.log.add(msg)
        else:
            if force:
                print msg

    def remove_multi_occupancies(self):
        """
        Keep only atoms with alternate A field (well, or no alternate).
        """
        if self.verbose:
            self.logWrite(self.model.pdbCode +
                          ': Removing multiple occupancies of atoms ...')

        i = 0
        to_be_removed = []

        for a in self.model:

            if a['alternate']:
                try:
                    str_id = "%i %s %s %i" % (a['serial_number'], a['name'],
                                              a['residue_name'],
                                              a['residue_number'])

                    if a['alternate'].upper() == 'A':
                        a['alternate'] = ''

                    else:
                        if float(a['occupancy']) < 1.0:
                            to_be_removed += [i]
                            if self.verbose:
                                self.logWrite(
                                    'removing %s (%s %s)' %
                                    (str_id, a['alternate'], a['occupancy']))
                        else:
                            if self.verbose:
                                self.logWrite((
                                    'keeping non-A duplicate %s because of 1.0 '
                                    + 'occupancy') % str_id)

                except:
                    self.logWrite("Error removing duplicate: " + t.lastError())
            i += 1

        try:
            self.model.remove(to_be_removed)
            if self.verbose:
                self.logWrite('Removed %i atoms' % len(to_be_removed))

        except:
            if self.verbose:
                self.logWrite('No atoms with multiple occupancies to remove')

    def replace_non_standard_AA(self, amber=0, keep=[]):
        """
        Replace amino acids with none standard names with standard
        amino acids according to L{MU.nonStandardAA}
        
        @param amber: don't rename HID, HIE, HIP, CYX, NME, ACE [0]
        @type  amber: 1||0
        @param keep: names of additional residues to keep
        @type keep:  [ str ]
        """
        standard = MU.atomDic.keys() + keep

        if amber:
            standard.extend(['HID', 'HIE', 'HIP', 'CYX', 'NME', 'ACE'])

        replaced = 0

        if self.verbose:
            self.logWrite(self.model.pdbCode +
                          ': Looking for non-standard residue names...')

        resnames = self.model['residue_name']
        for i in self.model.atomRange():

            resname = resnames[i].upper()

            if resname not in standard:
                if resname in MU.nonStandardAA:
                    resnames[i] = MU.nonStandardAA[resname]

                    if self.verbose:
                        self.logWrite('renamed %s %i to %s' % \
                                     (resname, i, MU.nonStandardAA[ resname ]))
                else:
                    resnames[i] = 'ALA'

                    self.logWrite('Warning: unknown residue name %s %i: ' \
                                  % (resname, i ) )
                    if self.verbose:
                        self.logWrite('\t->renamed to ALA.')

                replaced += 1

        if self.verbose:
            self.logWrite('Found %i atoms with non-standard residue names.'% \
                          replaced )

    def __standard_res(self, resname, amber=0):
        """
        Check if resname is a standard residue (according to L{MU.atomDic})
        if not return the closest standard residue (according to
        L{MU.nonStandardAA}).
        
        @param resname: 3-letter residue name
        @type  resname: str
        
        @return: name of closest standard residue or resname itself
        @rtype: str
        """
        if resname in MU.atomDic:
            return resname

        if resname in MU.nonStandardAA:
            return MU.nonStandardAA[resname]

        return resname

    def remove_non_standard_atoms(self):
        """
        First missing standard atom triggers removal of standard atoms that
        follow in the standard order. All non-standard atoms are removed too.
        Data about standard atoms are taken from L{MU.atomDic} and symomym
        atom name is defined in L{MU.atomSynonyms}.
        
        @return: number of atoms removed
        @rtype: int
        """
        mask = []

        if self.verbose:
            self.logWrite("Checking content of standard amino-acids...")

        for res in self.model.resList():

            resname = self.__standard_res(res[0]['residue_name']).upper()
            if resname == 'DC5':
                pass

            ## bugfix: ignore non-standard residues that have no matching
            ## standard residue
            if resname in MU.atomDic:

                standard = copy.copy(MU.atomDic[resname])

                ## replace known synonyms by standard atom name
                for a in res:
                    n = a['name']
                    if not n in standard and MU.atomSynonyms.get(
                            n, 0) in standard:
                        a['name'] = MU.atomSynonyms[n]
                        if self.verbose:
                            self.logWrite('%s: renaming %s to %s in %s %i' %\
                                          ( self.model.pdbCode, n, a['name'],
                                           a['residue_name'], a['residue_number']))

                anames = [a['name'] for a in res]
                keep = 1

                ## kick out all standard atoms that follow a missing one
                rm = []
                for n in standard:
                    if (not n in anames) and not (n in self.TOLERATE_MISSING):
                        keep = 0

                    if not keep:
                        rm += [n]

                for n in rm:
                    standard.remove(n)

                ## keep only atoms that are standard (and not kicked out above)
                for a in res:

                    if a['name'] not in standard:
                        mask += [1]
                        if self.verbose:
                            self.logWrite('%s: removing atom %s in %s %i '%\
                                          ( self.model.pdbCode, a['name'],
                                           a['residue_name'], a['residue_number']))
                    else:
                        mask += [0]

        self.model.remove(mask)

        if self.verbose:
            self.logWrite('Removed ' + str(N0.sum(mask)) +
                          ' atoms because they were non-standard' +
                          ' or followed a missing atom.')

        return N0.sum(mask)

    def capACE(self, model, chain, breaks=True):
        """
        Cap N-terminal of given chain.

        Note: In order to allow the capping of chain breaks,
        the chain index is, by default, based on model.chainIndex(breaks=True), 
        that means with chain break detection activated! This is not the 
        default behaviour of PDBModel.chainIndex or takeChains or chainLength. 
        Please use the wrapping method capTerminals() for more convenient 
        handling of the index.

        @param model: model
        @type  model: PDBMode
        @param chain: index of chain to be capped
        @type  chain: int
        @param breaks: consider chain breaks when identifying chain boundaries
        @type  breaks: bool
        
        @return: model with added NME capping
        @rtype : PDBModel
        """
        if self.verbose:
            self.logWrite('Capping N-terminal of chain %i with ACE' % chain)

        c_start = model.chainIndex(breaks=breaks)
        c_end = model.chainEndIndex(breaks=breaks)
        Nterm_is_break = False
        Cterm_is_break = False

        if breaks:
            Nterm_is_break = c_start[chain] not in model.chainIndex()
            Cterm_is_break = c_end[chain] not in model.chainEndIndex()

        m_ace = PDBModel(self.F_ace_cap)

        chains_before = model.takeChains(range(chain), breaks=breaks)
        m_chain = model.takeChains([chain], breaks=breaks)
        chains_after = model.takeChains(range(chain + 1, len(c_start)),
                                        breaks=breaks)

        m_term = m_chain.resModels()[0]

        ## we need 3 atoms for superposition, CB might mess things up but
        ## could help if there is no HN
        ##        if 'HN' in m_term.atomNames():
        m_ace.remove(['CB'])  ## use backbone 'C' rather than CB for fitting

        ## rename overhanging residue in cap PDB
        for a in m_ace:
            if a['residue_name'] != 'ACE':
                a['residue_name'] = m_term.atoms['residue_name'][0]
            else:
                a['residue_number'] = m_term.atoms['residue_number'][0] - 1
                a['chain_id'] = m_term.atoms['chain_id'][0]
                a['segment_id'] = m_term.atoms['segment_id'][0]

        ## fit cap onto first residue of chain
        m_ace = m_ace.magicFit(m_term)

        cap = m_ace.resModels()[0]
        serial = m_term['serial_number'][0] - len(cap)
        cap['serial_number'] = range(serial, serial + len(cap))

        ## concat cap on chain
        m_chain = cap.concat(m_chain, newChain=False)

        ## re-assemble whole model
        r = chains_before.concat(m_chain, newChain=not Nterm_is_break)

        r = r.concat(chains_after, newChain=not Cterm_is_break)

        if len(c_start) != r.lenChains(breaks=breaks):
            raise CappingError, 'Capping ACE would mask a chain break. '+\
                  'This typically indicates a tight gap with high risk of '+\
                  'clashes and other issues.'

        return r

    def capNME(self, model, chain, breaks=True):
        """
        Cap C-terminal of given chain. 

        Note: In order to allow the capping of chain breaks,
        the chain index is, by default, based on model.chainIndex(breaks=True), 
        that means with chain break detection activated! This is not the 
        default behaviour of PDBModel.chainIndex or takeChains or chainLength.
        Please use the wrapping method capTerminals() for more convenient 
        handling of the index.

        @param model: model
        @type  model: PDBMode
        @param chain: index of chain to be capped
        @type  chain: int
        @param breaks: consider chain breaks when identifying chain boundaries
        @type  breaks: bool
        
        @return: model with added NME capping residue
        @rtype : PDBModel
        """
        if self.verbose:
            self.logWrite('Capping C-terminal of chain %i with NME.' % chain)
        m_nme = PDBModel(self.F_nme_cap)

        c_start = model.chainIndex(breaks=breaks)
        c_end = model.chainEndIndex(breaks=breaks)
        Nterm_is_break = False
        Cterm_is_break = False
        if breaks:
            Nterm_is_break = c_start[chain] not in model.chainIndex()
            Cterm_is_break = c_end[chain] not in model.chainEndIndex()

        chains_before = model.takeChains(range(chain), breaks=breaks)
        m_chain = model.takeChains([chain], breaks=breaks)
        chains_after = model.takeChains(range(chain + 1, len(c_start)),
                                        breaks=breaks)

        m_term = m_chain.resModels()[-1]

        ## rename overhanging residue in cap PDB, renumber cap residue
        for a in m_nme:
            if a['residue_name'] != 'NME':
                a['residue_name'] = m_term.atoms['residue_name'][0]
            else:
                a['residue_number'] = m_term.atoms['residue_number'][0] + 1
                a['chain_id'] = m_term.atoms['chain_id'][0]
                a['segment_id'] = m_term.atoms['segment_id'][0]

        ## chain should not have any terminal O after capping
        m_chain.remove(['OXT'])

        ## fit cap onto last residue of chain
        m_nme = m_nme.magicFit(m_term)

        cap = m_nme.resModels()[-1]
        serial = m_term['serial_number'][-1] + 1
        cap['serial_number'] = range(serial, serial + len(cap))

        ## concat cap on chain
        m_chain = m_chain.concat(cap, newChain=False)

        ## should be obsolete now
        if getattr(m_chain, '_PDBModel__terAtoms', []) != []:
            m_chain._PDBModel__terAtoms = [len(m_chain) - 1]
        assert m_chain.lenChains() == 1

        ## re-assemble whole model
        r = chains_before.concat(m_chain, newChain=not Nterm_is_break)
        r = r.concat(chains_after, newChain=not Cterm_is_break)

        if len(c_start) != r.lenChains(breaks=breaks):
            raise CappingError, 'Capping NME would mask a chain break. '+\
                  'This typically indicates a tight gap with high risk of '+\
                  'clashes and other issues.'

        return r

    def convertChainIdsNter(self, model, chains):
        """
        Convert normal chain ids to chain ids considering chain breaks.
        """
        if len(chains) == 0:
            return chains
        i = N0.take(model.chainIndex(), chains)
        ## convert back to chain indices but this time including chain breaks
        return model.atom2chainIndices(i, breaks=1)

    def convertChainIdsCter(self, model, chains):
        """
        Convert normal chain ids to chain ids considering chain breaks.
        """
        if len(chains) == 0:
            return chains
        ## fetch last atom of given chains
        index = N0.concatenate((model.chainIndex(), [len(model)]))
        i = N0.take(index, N0.array(chains) + 1) - 1
        ## convert back to chain indices but this time including chain breaks
        return model.atom2chainIndices(i, breaks=1)

    def unresolvedTerminals(self, model):
        """
        Autodetect (aka "guess") which N- and C-terminals are most likely not
        the real end of each chain. This guess work is based on residue 
        numbering:
        
        * unresolved N-terminal: a protein residue with a residue number > 1

        * unresolved C-terminal: a protein residue that does not contain either
                               OXT or OT or OT1 or OT2 atoms
                               
        @param model: PDBModel
        
        @return: chains with unresolved N-term, with unresolved C-term
        @rtype : ([int], [int])
        """
        c_first = model.chainIndex()
        c_last = model.chainEndIndex()

        capN = [ i for (i,pos) in enumerate(c_first)\
                 if model['residue_number'][pos] > 1 ]

        capN = [i for i in capN if model['residue_name'][c_first[i]] != 'ACE']

        capN = self.filterProteinChains(model, capN, c_first)

        capC = []
        for (i, pos) in enumerate(c_last):
            atoms = model.takeResidues(model.atom2resIndices([pos
                                                              ])).atomNames()

            if not( 'OXT' in atoms or 'OT' in atoms or 'OT1' in atoms or \
                    'OT2' in atoms ):
                capC += [i]

        capC = self.filterProteinChains(model, capC, c_last)

        return capN, capC

    #@todo filter for protein positions in breaks=1

    def filterProteinChains(self, model, chains, chainindex):
        maskProtein = model.maskProtein()
        chains = [i for i in chains if maskProtein[chainindex[i]]]
        return chains

    def capTerminals(self, auto=False, breaks=False, capN=[], capC=[]):
        """
        Add NME and ACE capping residues to chain breaks or normal N- and 
        C-terminals. Note: these capping residues contain hydrogen atoms.
        
        Chain indices for capN and capC arguments can be interpreted either
        with or without chain break detection enabled. For example, let's
        assume we have a two-chain protein with some missing residues (chain
        break) in the first chain:
        
        A:   MGSKVSK---FLNAGSK
        B:   FGHLAKSDAK

        Then:
          capTerminals( breaks=False, capN=[1], capC=[1]) will add N-and 
          C-terminal caps to chain B.
        However:
          capTerminals( breaks=True, capN=[1], capC=[1]) will add N- and 
          C-terminal caps to the second fragment of chain A.
          
        
        Note: this operation *replaces* the internal model.
        
        @param auto: put ACE and NME capping residue on chain breaks
                     and on suspected false N- and C-termini (default: False)
        @type  auto: bool
        @param breaks: switch on chain break detection before interpreting
                       capN and capC
        @type  breaks: False
        @param capN: indices of chains that should get ACE cap (default: [])
        @type  capN: [int]
        @param capC: indices of chains that should get NME cap (default: [])
        @type  capC: [int]
        """
        m = self.model
        c_len = m.lenChains()
        i_breaks = m.chainBreaks()

        if auto:
            if not breaks:
                capN = self.convertChainIdsNter(m, capN)
                capC = self.convertChainIdsCter(m, capC)

            breaks = True
            capN, capC = self.unresolvedTerminals(m)

            end_broken = m.atom2chainIndices(m.chainBreaks(), breaks=1)

            capC = M.union(capC, end_broken)
            capN = M.union(capN, N0.array(end_broken) + 1)

        capN = self.filterProteinChains(m, capN, m.chainIndex(breaks=breaks))
        capC = self.filterProteinChains(m, capC,
                                        m.chainEndIndex(breaks=breaks))

        for i in capN:
            m = self.capACE(m, i, breaks=breaks)
            assert m.lenChains() == c_len, '%i != %i' % \
                   (m.lenChains(), c_len)
            assert len(m.chainBreaks(force=True)) == len(i_breaks)
            assert m[
                'serial_number'].dtype == N0.Int32, 'serial_number not int'

        for i in capC:
            m = self.capNME(m, i, breaks=breaks)
            assert m.lenChains() == c_len
            assert len(m.chainBreaks(force=True)) == len(i_breaks)

        self.model = m
        return self.model

    def process(self, keep_hetatoms=0, amber=0, keep_xaa=[]):
        """
        Remove Hetatoms, waters. Replace non-standard names.
        Remove non-standard atoms.
        
        @param keep_hetatoms: option
        @type  keep_hetatoms: 0||1
        @param amber: don't rename amber residue names (HIE, HID, CYX,..)
        @type  amber: 0||1
        @param keep_xaa: names of non-standard residues to be kept
        @type  keep_xaa: [ str ]
        
        @return: PDBModel (reference to internal)
        @rtype: PDBModel
        
        @raise CleanerError: if something doesn't go as expected ...
        """
        try:
            if not keep_hetatoms:
                self.model.remove(self.model.maskHetatm())

            self.model.remove(self.model.maskH2O())

            self.model.remove(self.model.maskH())

            self.remove_multi_occupancies()

            self.replace_non_standard_AA(amber=amber, keep=keep_xaa)

            self.remove_non_standard_atoms()

        except KeyboardInterrupt, why:
            raise KeyboardInterrupt(why)
        except Exception, why:
            self.logWrite('Error: ' + t.lastErrorTrace())
            raise CleanerError('Error cleaning model: %r' % why)
コード例 #39
0
class Test(BT.BiskitTest):
    """Test class """
    def prepare(self):
        from Biskit.LogFile import LogFile
        import tempfile

    def test_PDBCleaner(self):
        """PDBCleaner general test"""

        ## Loading PDB...
        self.c = PDBCleaner(t.testRoot() + '/rec/1A2P_rec_original.pdb',
                            log=self.log,
                            verbose=self.local)

        self.m = self.c.process()

        self.assertAlmostEqual(self.m.mass(), 34029.0115499993, 7)

    def test_DNACleaning(self):
        """PDBCleaner DNA test"""
        ## Loading PDB...
        self.c = PDBCleaner(t.testRoot() + 'amber/entropy/0_com.pdb',
                            log=self.log,
                            verbose=self.local)

        self.dna = self.c.process(amber=True)

        self.assertAlmostEqual(self.dna.mass(), 26953.26, 1)

    def test_Capping(self):
        """PDBCleaner.capTerminals test"""
        ## Loading PDB...
        self.model = PDBModel(t.testRoot() + '/rec/1A2P_rec_original.pdb')

        self.c = PDBCleaner(self.model, log=self.log, verbose=self.local)
        self.m2 = self.c.capTerminals(breaks=True)
        self.assert_(self.m2.atomNames() == self.model.atomNames())

        self.m3 = self.model.clone()
        self.m3.removeRes([10, 11, 12, 13, 14, 15])
        self.m4 = self.m3.clone()

        self.c = PDBCleaner(self.m3, log=self.log, verbose=self.local)
        self.m3 = self.c.capTerminals(breaks=True, capC=[0], capN=[0, 1])
        self.assertEqual(
            self.m3.takeChains([0]).sequence()[:18], 'XVINTFDGVADXXKLPDN')

        if self.local:
            self.log.add('\nTesting automatic chain capping...\n')

        self.c = PDBCleaner(self.m4, log=self.log, verbose=self.local)
        self.m4 = self.c.capTerminals(auto=True)
        self.assertEqual(
            self.m4.takeChains([0]).sequence()[:18], 'XVINTFDGVADXXKLPDN')

    def test_capping_extra(self):
        """PDBCleaner.capTerminals extra challenge"""
        self.m2 = PDBModel(t.testRoot() + '/pdbclean/foldx_citche.pdb')
        self.c = PDBCleaner(self.m2, verbose=self.local, log=self.log)
        self.assertRaises(CappingError, self.c.capTerminals, auto=True)
        if self.local:
            self.log.add('OK: CappingError has been raised indicating clash.')

        self.assertEqual(len(self.m2.takeChains([1]).chainBreaks()), 1)
コード例 #40
0
ファイル: PDBCleaner.py プロジェクト: ostrokach/biskit
    def capACE( self, model, chain, breaks=True ):
        """
        Cap N-terminal of given chain.

        Note: In order to allow the capping of chain breaks,
        the chain index is, by default, based on model.chainIndex(breaks=True), 
        that means with chain break detection activated! This is not the 
        default behaviour of PDBModel.chainIndex or takeChains or chainLength. 
        Please use the wrapping method capTerminals() for more convenient 
        handling of the index.

        @param model: model
        @type  model: PDBMode
        @param chain: index of chain to be capped
        @type  chain: int
        @param breaks: consider chain breaks when identifying chain boundaries
        @type  breaks: bool
        
        @return: model with added NME capping
        @rtype : PDBModel
        """
        if self.verbose:
            self.logWrite('Capping N-terminal of chain %i with ACE' % chain )

        c_start = model.chainIndex( breaks=breaks )
        c_end = model.chainEndIndex( breaks=breaks)
        Nterm_is_break = False
        Cterm_is_break = False
        
        if breaks:
            Nterm_is_break = c_start[chain] not in model.chainIndex()
            Cterm_is_break = c_end[chain] not in model.chainEndIndex()
            
        m_ace = PDBModel( self.F_ace_cap )

        chains_before = model.takeChains( range(chain), breaks=breaks )
        m_chain       = model.takeChains( [chain], breaks=breaks )
        chains_after  = model.takeChains( range(chain+1, len(c_start)),
                                          breaks=breaks )

        m_term  = m_chain.resModels()[0]

        ## we need 3 atoms for superposition, CB might mess things up but
        ## could help if there is no HN
        ##        if 'HN' in m_term.atomNames():
        m_ace.remove( ['CB'] )  ## use backbone 'C' rather than CB for fitting 

        ## rename overhanging residue in cap PDB
        for a in m_ace:
            if a['residue_name'] != 'ACE':
                a['residue_name'] = m_term.atoms['residue_name'][0]
            else:
                a['residue_number'] = m_term.atoms['residue_number'][0]-1
                a['chain_id']       = m_term.atoms['chain_id'][0]
                a['segment_id']     = m_term.atoms['segment_id'][0]

        ## fit cap onto first residue of chain
        m_ace = m_ace.magicFit( m_term )

        cap = m_ace.resModels()[0]
        serial = m_term['serial_number'][0] - len(cap)
        cap['serial_number'] = range( serial, serial + len(cap) )

        ## concat cap on chain
        m_chain = cap.concat( m_chain, newChain=False )

        ## re-assemble whole model
        r = chains_before.concat( m_chain, newChain=not Nterm_is_break)
        r = r.concat( chains_after, newChain=not Cterm_is_break)

        if len(c_start) != r.lenChains( breaks=breaks ):
            raise CappingError, 'Capping ACE would mask a chain break. '+\
                  'This typically indicates a tight gap with high risk of '+\
                  'clashes and other issues.'

        return r
コード例 #41
0
ファイル: PDBCleaner.py プロジェクト: ostrokach/biskit
class PDBCleaner:
    """
    PDBCleaner performs the following tasks:
    
      * remove HETAtoms from PDB
      * replace non-standard AA by its closest standard AA
      * remove non-standard atoms from standard AA residues
      * delete atoms that follow missing atoms (in a chain)
      * remove multiple occupancy atoms (except the one with highest occupancy)
      * add ACE and NME capping residues to C- and N-terminals or chain breaks
        (see capTerminals(), this is NOT done automatically in process())

    Usage:
    =======

      >>> c = PDBCleaner( model )
      >>> c.process()
      >>> c.capTerminals( auto=True )

    This will modify the model in-place and report changes to STDOUT.
    Alternatively, you can specify a log file instance for the output.
    PDBCleaner.process accepts several options to modify the processing.
    
    Capping
    =======
    
    Capping will add N-methyl groups to free C-terminal carboxy ends
    or Acetyl groups to free N-terminal Amines and will thus 'simulate' the
    continuation of the protein chain -- a common practice in order to 
    prevent fake terminal charges. The automatic discovery of missing residues
    is guess work at best. The more conservative approach is to use,
    for example:
    
      >>> c.capTerminals( breaks=1, capC=[0], capN=[2] )
      
    In this case, only the chain break detection is used for automatic capping
    -- the last residue before a chain break is capped with NME and the first
    residue after the chain break is capped with ACE. Chain break detection
    relies on PDBModel.chainBreaks() (via PDBModel.chainIndex( breaks=1 )).
    The normal terminals to be capped are now specified explicitely. The first
    chain (not counting chain breaks) will receive a NME C-terminal cap and the
    third chain of the PDB will receive a N-terminal ACE cap. 
    
    Note: Dictionaries with standard residues and atom content are defined
          in Biskit.molUtils. This is a duplicate effort with the new strategy
          to parse Amber prep files for very similar information
          (AmberResidueType, AmberResidueLibrary) and should change once we 
          implement a real framework for better residue handling. 
    """
    
    #: these atoms always occur at the tip of of a chain or within a ring
    #: and, if missing, will not trigger the removal of other atoms
    TOLERATE_MISSING = ['O', 'CG2', 'CD1', 'CD2', 'OG1', 'OE1', 'NH1',
                        'OD1', 'OE1',
                        'H5T',"O5'", ]

    ## PDB with ACE capping residue
    F_ace_cap = t.dataRoot() + '/amber/leap/ace_cap.pdb'
    ## PDB with NME capping residue
    F_nme_cap = t.dataRoot() + '/amber/leap/nme_cap.pdb'

    def __init__( self, fpdb, log=None, verbose=True ):
        """
        @param fpdb: pdb file OR PDBModel instance
        @type  fpdb: str OR Biskit.PDBModel
        @param log: Biskit.LogFile object (default: STDOUT)
        @type  log: Biskit.LogFile
        @param verbose: log warnings and infos (default: True)
        @type  verbose: bool
        """
        self.model = PDBModel( fpdb )
        self.log = log or StdLog()
        self.verbose = verbose


    def logWrite( self, msg, force=1 ):
        if self.log:
            self.log.add( msg )
        else:
            if force:
                print msg

    def remove_multi_occupancies( self ):
        """
        Keep only atoms with alternate A field (well, or no alternate).
        """
        if self.verbose:
            self.logWrite( self.model.pdbCode +
                           ': Removing multiple occupancies of atoms ...')

        i = 0
        to_be_removed = []

        for a in self.model:

            if a['alternate']:
                try:
                    str_id = "%i %s %s %i" % (a['serial_number'], a['name'],
                                              a['residue_name'],
                                              a['residue_number'])

                    if a['alternate'].upper() == 'A':
                        a['alternate'] = ''

                    else:
                        if float( a['occupancy'] ) < 1.0:
                            to_be_removed += [ i ]
                            if self.verbose:
                                self.logWrite(
                                    'removing %s (%s %s)' %
                                    (str_id,a['alternate'], a['occupancy']))
                        else:
                            if self.verbose:
                                self.logWrite(
                                 ('keeping non-A duplicate %s because of 1.0 '+
                                  'occupancy') % str_id )

                except:
                    self.logWrite("Error removing duplicate: "+t.lastError() )
            i+=1

        try:
            self.model.remove( to_be_removed )
            if self.verbose:
                self.logWrite('Removed %i atoms' % len( to_be_removed ) )

        except:
            if self.verbose:
                self.logWrite('No atoms with multiple occupancies to remove' )


    def replace_non_standard_AA( self, amber=0, keep=[] ):
        """
        Replace amino acids with none standard names with standard
        amino acids according to L{MU.nonStandardAA}
        
        @param amber: don't rename HID, HIE, HIP, CYX, NME, ACE [0]
        @type  amber: 1||0
        @param keep: names of additional residues to keep
        @type keep:  [ str ]
        """
        standard = MU.atomDic.keys() + keep

        if amber:
            standard.extend( ['HID', 'HIE', 'HIP', 'CYX', 'NME', 'ACE'] )

        replaced = 0

        if self.verbose:
            self.logWrite(self.model.pdbCode +
                          ': Looking for non-standard residue names...')

        resnames = self.model['residue_name']
        for i in self.model.atomRange():

            resname = resnames[i].upper()

            if resname not in standard:
                if resname in MU.nonStandardAA:
                    resnames[i] = MU.nonStandardAA[ resname ]

                    if self.verbose:
                        self.logWrite('renamed %s %i to %s' % \
                                     (resname, i, MU.nonStandardAA[ resname ]))
                else:
                    resnames[i] = 'ALA'

                    self.logWrite('Warning: unknown residue name %s %i: ' \
                                  % (resname, i ) )
                    if self.verbose:
                        self.logWrite('\t->renamed to ALA.')

                replaced += 1

        if self.verbose:
            self.logWrite('Found %i atoms with non-standard residue names.'% \
                          replaced )


    def __standard_res( self, resname, amber=0 ):
        """
        Check if resname is a standard residue (according to L{MU.atomDic})
        if not return the closest standard residue (according to
        L{MU.nonStandardAA}).
        
        @param resname: 3-letter residue name
        @type  resname: str
        
        @return: name of closest standard residue or resname itself
        @rtype: str
        """
        if resname in MU.atomDic:
            return resname

        if resname in MU.nonStandardAA:
            return MU.nonStandardAA[ resname ]

        return resname


    def remove_non_standard_atoms( self ):
        """
        First missing standard atom triggers removal of standard atoms that
        follow in the standard order. All non-standard atoms are removed too.
        Data about standard atoms are taken from L{MU.atomDic} and symomym
        atom name is defined in L{MU.atomSynonyms}.
        
        @return: number of atoms removed
        @rtype: int
        """
        mask = []
        
        if self.verbose:
            self.logWrite("Checking content of standard amino-acids...")

        for res in self.model.resList():

            resname  = self.__standard_res( res[0]['residue_name'] ).upper()
            if resname == 'DC5':
                pass
            
            ## bugfix: ignore non-standard residues that have no matching 
            ## standard residue
            if resname in MU.atomDic:
                
                standard = copy.copy( MU.atomDic[ resname ] )
    
                ## replace known synonyms by standard atom name
                for a in res:
                    n = a['name']
                    if not n in standard and MU.atomSynonyms.get(n,0) in standard:
                        a['name'] = MU.atomSynonyms[n]
                        if self.verbose:
                            self.logWrite('%s: renaming %s to %s in %s %i' %\
                                          ( self.model.pdbCode, n, a['name'],
                                           a['residue_name'], a['residue_number']))
    
                anames   = [ a['name'] for a in res ]
                keep = 1
    
                ## kick out all standard atoms that follow a missing one
                rm = []
                for n in standard:
                    if (not n in anames) and not (n in self.TOLERATE_MISSING):
                        keep = 0
    
                    if not keep:
                        rm += [ n ]
    
                for n in rm:
                    standard.remove( n )
    
                ## keep only atoms that are standard (and not kicked out above)
                for a in res:
    
                    if a['name'] not in standard:
                        mask += [1]
                        if self.verbose:
                            self.logWrite('%s: removing atom %s in %s %i '%\
                                          ( self.model.pdbCode, a['name'],
                                           a['residue_name'], a['residue_number']))
                    else:
                        mask += [0]

        self.model.remove( mask )
        
        if self.verbose:
            self.logWrite('Removed ' + str(N.sum(mask)) +
                          ' atoms because they were non-standard' +
                          ' or followed a missing atom.' )

        return N.sum( mask )

    def capACE( self, model, chain, breaks=True ):
        """
        Cap N-terminal of given chain.

        Note: In order to allow the capping of chain breaks,
        the chain index is, by default, based on model.chainIndex(breaks=True), 
        that means with chain break detection activated! This is not the 
        default behaviour of PDBModel.chainIndex or takeChains or chainLength. 
        Please use the wrapping method capTerminals() for more convenient 
        handling of the index.

        @param model: model
        @type  model: PDBMode
        @param chain: index of chain to be capped
        @type  chain: int
        @param breaks: consider chain breaks when identifying chain boundaries
        @type  breaks: bool
        
        @return: model with added NME capping
        @rtype : PDBModel
        """
        if self.verbose:
            self.logWrite('Capping N-terminal of chain %i with ACE' % chain )

        c_start = model.chainIndex( breaks=breaks )
        c_end = model.chainEndIndex( breaks=breaks)
        Nterm_is_break = False
        Cterm_is_break = False
        
        if breaks:
            Nterm_is_break = c_start[chain] not in model.chainIndex()
            Cterm_is_break = c_end[chain] not in model.chainEndIndex()
            
        m_ace = PDBModel( self.F_ace_cap )

        chains_before = model.takeChains( range(chain), breaks=breaks )
        m_chain       = model.takeChains( [chain], breaks=breaks )
        chains_after  = model.takeChains( range(chain+1, len(c_start)),
                                          breaks=breaks )

        m_term  = m_chain.resModels()[0]

        ## we need 3 atoms for superposition, CB might mess things up but
        ## could help if there is no HN
        ##        if 'HN' in m_term.atomNames():
        m_ace.remove( ['CB'] )  ## use backbone 'C' rather than CB for fitting 

        ## rename overhanging residue in cap PDB
        for a in m_ace:
            if a['residue_name'] != 'ACE':
                a['residue_name'] = m_term.atoms['residue_name'][0]
            else:
                a['residue_number'] = m_term.atoms['residue_number'][0]-1
                a['chain_id']       = m_term.atoms['chain_id'][0]
                a['segment_id']     = m_term.atoms['segment_id'][0]

        ## fit cap onto first residue of chain
        m_ace = m_ace.magicFit( m_term )

        cap = m_ace.resModels()[0]
        serial = m_term['serial_number'][0] - len(cap)
        cap['serial_number'] = range( serial, serial + len(cap) )

        ## concat cap on chain
        m_chain = cap.concat( m_chain, newChain=False )

        ## re-assemble whole model
        r = chains_before.concat( m_chain, newChain=not Nterm_is_break)
        r = r.concat( chains_after, newChain=not Cterm_is_break)

        if len(c_start) != r.lenChains( breaks=breaks ):
            raise CappingError, 'Capping ACE would mask a chain break. '+\
                  'This typically indicates a tight gap with high risk of '+\
                  'clashes and other issues.'

        return r


    def capNME( self, model, chain, breaks=True ):
        """
        Cap C-terminal of given chain. 

        Note: In order to allow the capping of chain breaks,
        the chain index is, by default, based on model.chainIndex(breaks=True), 
        that means with chain break detection activated! This is not the 
        default behaviour of PDBModel.chainIndex or takeChains or chainLength.
        Please use the wrapping method capTerminals() for more convenient 
        handling of the index.

        @param model: model
        @type  model: PDBMode
        @param chain: index of chain to be capped
        @type  chain: int
        @param breaks: consider chain breaks when identifying chain boundaries
        @type  breaks: bool
        
        @return: model with added NME capping residue
        @rtype : PDBModel
        """
        if self.verbose:
            self.logWrite('Capping C-terminal of chain %i with NME.' % chain )
        m_nme   = PDBModel( self.F_nme_cap )

        c_start = model.chainIndex( breaks=breaks )
        c_end = model.chainEndIndex( breaks=breaks)
        Nterm_is_break = False
        Cterm_is_break = False
        if breaks:
            Nterm_is_break = c_start[chain] not in model.chainIndex()
            Cterm_is_break = c_end[chain] not in model.chainEndIndex()
         
        chains_before = model.takeChains( range(chain), breaks=breaks )
        m_chain       = model.takeChains( [chain], breaks=breaks )
        chains_after  = model.takeChains( range(chain+1, len(c_start)),
                                          breaks=breaks )

        m_term  = m_chain.resModels()[-1]

        ## rename overhanging residue in cap PDB, renumber cap residue
        for a in m_nme:
            if a['residue_name'] != 'NME':
                a['residue_name'] = m_term.atoms['residue_name'][0]
            else:
                a['residue_number'] = m_term.atoms['residue_number'][0]+1
                a['chain_id']       = m_term.atoms['chain_id'][0]
                a['segment_id']     = m_term.atoms['segment_id'][0]

        ## chain should not have any terminal O after capping
        m_chain.remove( ['OXT'] )            

        ## fit cap onto last residue of chain
        m_nme = m_nme.magicFit( m_term )
        
        cap = m_nme.resModels()[-1]
        serial = m_term['serial_number'][-1]+1
        cap['serial_number'] = range( serial, serial + len(cap) )

        ## concat cap on chain
        m_chain = m_chain.concat( cap, newChain=False )

        ## should be obsolete now
        if getattr( m_chain, '_PDBModel__terAtoms', []) != []:
            m_chain._PDBModel__terAtoms = [ len( m_chain ) - 1 ]
        assert m_chain.lenChains() == 1

        ## re-assemble whole model
        r = chains_before.concat( m_chain, newChain=not Nterm_is_break)
        r = r.concat( chains_after, newChain=not Cterm_is_break)

        if len(c_start) != r.lenChains( breaks=breaks ):
            raise CappingError, 'Capping NME would mask a chain break. '+\
                  'This typically indicates a tight gap with high risk of '+\
                  'clashes and other issues.'
        
        return r


    def convertChainIdsNter( self, model, chains ):
        """
        Convert normal chain ids to chain ids considering chain breaks.
        """
        if len(chains) == 0: 
            return chains
        i = N.take( model.chainIndex(), chains ) 
        ## convert back to chain indices but this time including chain breaks
        return model.atom2chainIndices( i, breaks=1 )
        
    def convertChainIdsCter( self, model, chains ):
        """
        Convert normal chain ids to chain ids considering chain breaks.
        """
        if len(chains) == 0: 
            return chains
        ## fetch last atom of given chains
        index = N.concatenate( (model.chainIndex(), [len(model)]) )
        i = N.take( index, N.array( chains ) + 1 ) - 1
        ## convert back to chain indices but this time including chain breaks
        return model.atom2chainIndices( i, breaks=1 )
    

    def unresolvedTerminals( self, model ):
        """
        Autodetect (aka "guess") which N- and C-terminals are most likely not
        the real end of each chain. This guess work is based on residue 
        numbering:
        
        * unresolved N-terminal: a protein residue with a residue number > 1

        * unresolved C-terminal: a protein residue that does not contain either
                               OXT or OT or OT1 or OT2 atoms
                               
        @param model: PDBModel
        
        @return: chains with unresolved N-term, with unresolved C-term
        @rtype : ([int], [int])
        """
        c_first = model.chainIndex()
        c_last  = model.chainEndIndex()
        
        capN = [ i for (i,pos) in enumerate(c_first)\
                 if model['residue_number'][pos] > 1 ]
        
        capN = [i for i in capN if model['residue_name'][c_first[i]] != 'ACE']
        
        capN = self.filterProteinChains( model, capN, c_first )
        
        capC = []
        for (i,pos) in enumerate(c_last):
            atoms = model.takeResidues(model.atom2resIndices([pos])).atomNames()
            
            if not( 'OXT' in atoms or 'OT' in atoms or 'OT1' in atoms or \
                    'OT2' in atoms ):
                capC += [ i ]

        capC = self.filterProteinChains( model, capC, c_last )
                  
        return capN, capC
    
    #@todo filter for protein positions in breaks=1

    def filterProteinChains( self, model, chains, chainindex ):
        maskProtein = model.maskProtein()
        chains = [ i for i in chains if maskProtein[ chainindex[i] ] ]
        return chains

    def capTerminals( self, auto=False, breaks=False, capN=[], capC=[] ):
        """
        Add NME and ACE capping residues to chain breaks or normal N- and 
        C-terminals. Note: these capping residues contain hydrogen atoms.
        
        Chain indices for capN and capC arguments can be interpreted either
        with or without chain break detection enabled. For example, let's
        assume we have a two-chain protein with some missing residues (chain
        break) in the first chain:
        
        A:   MGSKVSK---FLNAGSK
        B:   FGHLAKSDAK

        Then:
          capTerminals( breaks=False, capN=[1], capC=[1]) will add N-and 
          C-terminal caps to chain B.
        However:
          capTerminals( breaks=True, capN=[1], capC=[1]) will add N- and 
          C-terminal caps to the second fragment of chain A.
          
        
        Note: this operation *replaces* the internal model.
        
        @param auto: put ACE and NME capping residue on chain breaks
                     and on suspected false N- and C-termini (default: False)
        @type  auto: bool
        @param breaks: switch on chain break detection before interpreting
                       capN and capC
        @type  breaks: False
        @param capN: indices of chains that should get ACE cap (default: [])
        @type  capN: [int]
        @param capC: indices of chains that should get NME cap (default: [])
        @type  capC: [int]
        """
        m = self.model
        c_len = m.lenChains()
        i_breaks = m.chainBreaks()
            
        if auto:
            if not breaks:
                capN = self.convertChainIdsNter( m, capN )
                capC = self.convertChainIdsCter( m, capC )

            breaks=True
            capN, capC = self.unresolvedTerminals( m )
        
            end_broken = m.atom2chainIndices( m.chainBreaks(), breaks=1 )
            
            capC = M.union( capC, end_broken )
            capN = M.union( capN, N.array( end_broken ) + 1 )
            
        capN = self.filterProteinChains(m, capN, m.chainIndex(breaks=breaks))
        capC = self.filterProteinChains(m, capC, m.chainEndIndex(breaks=breaks))

        for i in capN:
            m = self.capACE( m, i, breaks=breaks )
            assert m.lenChains() == c_len, '%i != %i' % \
                   (m.lenChains(), c_len)
            assert len(m.chainBreaks(force=True)) == len(i_breaks)

        for i in capC:
            m = self.capNME( m, i, breaks=breaks )
            assert m.lenChains() == c_len
            assert len(m.chainBreaks(force=True)) == len(i_breaks)
        
        self.model = m
        return self.model

    
    
    def process( self, keep_hetatoms=0, amber=0, keep_xaa=[] ):
        """
        Remove Hetatoms, waters. Replace non-standard names.
        Remove non-standard atoms.
        
        @param keep_hetatoms: option
        @type  keep_hetatoms: 0||1
        @param amber: don't rename amber residue names (HIE, HID, CYX,..)
        @type  amber: 0||1
        @param keep_xaa: names of non-standard residues to be kept
        @type  keep_xaa: [ str ]
        
        @return: PDBModel (reference to internal)
        @rtype: PDBModel
        
        @raise CleanerError: if something doesn't go as expected ...
        """
        try:
            if not keep_hetatoms:
                self.model.remove( self.model.maskHetatm() )

            self.model.remove( self.model.maskH2O() )

            self.model.remove( self.model.maskH() )

            self.remove_multi_occupancies()

            self.replace_non_standard_AA( amber=amber, keep=keep_xaa )

            self.remove_non_standard_atoms()


        except KeyboardInterrupt, why:
            raise KeyboardInterrupt( why )
        except Exception, why:
            self.logWrite('Error: '+t.lastErrorTrace())
            raise CleanerError( 'Error cleaning model: %r' % why )
コード例 #42
0
ファイル: AmberParmBuilder.py プロジェクト: ostrokach/biskit
class AmberParmBuilder:
    """
    AmberParmBuilder
    ================
    Create Amber topology and coordinate file from PDB.

      - parmMirror():
         ...builds a fake parm that exactly mirrors a given PDB file.
         This parm can be used for ptraj but not for simulations.
         Currently, parmMirror only accepts amber-formatted PDBs as
         input. It should be possible to create topologies that have
         the same content and order of atoms as an xplor PDB but
         some atoms will have different names.

      - parmSolvated():
         ...builds a solvated system for PME simulations (incl. closing
         of S-S bonds, capping of chain breaks). parmSolvated accepts
         both xplor and amber-formatted PDBs as input.

    Requires the amber programs C{tleap} and C{ambpdb}.
    Requires leap template files in C{biskit/Biskit/data/amber/leap/}.
    
    Note on forcefields:

       The default forcefield used is specified in exe_tleap and currently
       is ff10. This translates to loading amber11/dat/leap/cmd/leaprc.ff10
       at the beginning of the leap run. As of 2011, ff10 is the recommended
       default forcefield for proteins and nucleic acids.
       Comment from Jason Swails on the Amber mailing list: 
       "
       Try using ff99SB (which is the protein force field part of ff10, which is
       the version I would actually suggest using).  Despite its label, it is
       actually a 2006 update of the ff99 force field which performs at least as
       well (if not better) as ff03."
       
       Unfortunately, ions are only "half" paramterized in ff10. Additional 
       parameters need to be loaded from a frmod file, typically 
       frcmod.ionsjc_tip3p. There are additional versions of this file optimized
       for other water models than TIP3. frcmod.ionsjc_tip3p is set as the 
       default frmod file to include by parmSolvated and parmMirror. Please
       include it if you provide your own list of frmod files.

    @note: The design of AmberParmBuilder is less than elegant. It
           would make more sense to split it into two classes that
           are both derrived from Executor.
    """

    ## script to create a parm that exactly mirrors a given PDB
    script_mirror_pdb = """
    logFile %(f_out)s
    source %(leaprc)s
    %(fmod)s
    %(fprep)s
    p = loadPdb %(in_pdb)s
    %(delete_atoms)s
    saveAmberParm p %(out_parm)s %(out_crd)s
    quit
    """

    ## tleap command to close a single S-S bond
    ss_bond = "bond p.%i.SG p.%i.SG\n"

    ## leap script for solvated topology
    F_leap_in = t.dataRoot() + '/amber/leap/solvate_box.leap'
    ## PDB with ACE capping residue
    F_ace_cap = t.dataRoot() + '/amber/leap/ace_cap.pdb'
    ## PDB with NME capping residue
    F_nme_cap = t.dataRoot() + '/amber/leap/nme_cap.pdb'

    def __init__( self, model,
                  leap_template=F_leap_in,
                  leaprc=None,
                  leap_out=None, leap_in=None,
                  leap_pdb=None,
                  log=None,
                  debug=0,
                  verbose=0,
                  **kw ):
        """
        @param model: model
        @type  model: PDBModel or str
        @param leap_template: path to template file for leap input
        @type  leap_template: str
        @param leaprc: forcefield parameter file or code (e.g. ff99)
        @type  leaprc: str
        @param leap_out: target file for leap.log (default: discard)
        @type  leap_out: str
        @param leap_in: target file for leap.in script (default: discard)
        @type  leap_in: str
        @param kw: kw=value pairs for additional options in the leap_template
        @type  kw: key=value
        """
        self.m = PDBModel( model )

        self.leap_template = leap_template
        self.leaprc  = leaprc

        self.leap_pdb = leap_pdb or tempfile.mktemp( '_leap_pdb' )
        self.keep_leap_pdb = leap_pdb is not None

        self.leap_in = leap_in
        self.leap_out= leap_out

        self.log = log or StdLog()
        
        self.output = None   # last output of leap

        self.debug = debug
        self.verbose = verbose

        self.__dict__.update( kw )


    def __runLeap( self, in_script, in_pdb, norun=0, **kw ):
        """
        Create script file and run Leap.

        @param in_script: content of ptraj script with place holders
        @type  in_script: str
        @param in_pdb: PDB file to load into tleap
        @type  in_pdb: str
        @param norun: 1 - only create leap scrip (default: 0)
        @type  norun: 1|0
        @param kw: key=value pairs for filling place holders in script
        @type  kw: key=value

        @raise AmberError: if missing option for leap input file or
                           if could not create leap input file
        """
        x = AmberLeap( in_script,
                       in_pdb=in_pdb,
                       log=self.log, verbose=self.verbose, debug=self.debug,
                       catch_out=True,
                       f_in=self.leap_in,
                       f_out=self.leap_out,
                       **kw )
        if norun:
            x.generateInp()
        else:
            x.run()
            self.output = x.output
        
##         ## create leap script
##         try:
##             ## use own fields and given kw as parameters for leap script
##             d = copy.copy( self.__dict__ )
##             d.update( kw )

##             in_script = in_script % d
##             f = open( self.leap_in, 'w')
##             f.write( in_script )
##             f.close()

##             if self.verbose:
##                 self.log.add('leap-script: ')
##                 self.log.add( in_script )

##         except IOError:
##             raise AmberError('Could not create leap input file')
##         except:
##             raise AmberError('missing option for leap input file\n'+\
##                              'available: %s' % (str( d.keys() ) ))

##         ## run tleap
##         args = '-f %s' % self.leap_in

##         if not norun:
##             self.exe = Executor('tleap', args, log=self.log,verbose=1,
##                                 catch_out=0)
##             self.output, self.error, self.status = self.exe.run()

##             if not os.path.exists( kw['out_parm'] ):
##                 raise AmberError, "tleap failed"

##         ## clean up

##         if not self.keep_leap_in and not self.debug:
##             t.tryRemove( self.leap_in )
##         if not self.keep_leap_out and not self.debug:
##             t.tryRemove( self.leap_out)


    def parm2pdb( self, f_parm, f_crd, f_out, aatm=0 ):
        """
        Use ambpdb to build PDB from parm and crd.

        @param f_parm: existing parm file
        @type  f_parm: str
        @param f_crd: existing crd file
        @type  f_crd: str
        @param f_out: target file name for PDB
        @type  f_out: str

        @return: f_out, target file name for PDB
        @rtype: str

        @raise AmberError: if ambpdb fail
        """
##         cmd = '%s -p %s -aatm < %s > %s' % \
        args = '-p %s %s' % (f_parm, '-aatm'*aatm )

        x = Executor('ambpdb', args, f_in=f_crd, f_out=f_out,
                     log=self.log, verbose=1, catch_err=1)

        output,error,status = x.run()

        if not os.path.exists( f_out ):
            raise AmberError, 'ambpdb failed.'

        return f_out


    def __ssBonds( self, model, cutoff=4. ):
        """
        Identify disulfide bonds.

        @param model: model
        @type  model: PDBModel        
        @param cutoff: distance cutoff for S-S distance (default: 4.0)
        @type  cutoff: float
        
        @return: list with numbers of residue pairs forming S-S
        @rtype: [(int, int)]
        """
        m = model.compress( model.mask( ['SG'] ) )

        if len( m ) < 2:
            return []

        pw = MU.pairwiseDistances( m.xyz, m.xyz )

        pw = N.less( pw, cutoff )

        r = []
        for i in range( len( pw ) ):
            for j in range( i+1, len(pw) ):
                if pw[i,j]:
                    r += [ (m.atoms['residue_number'][i],
                            m.atoms['residue_number'][j]) ]
        return r


    def __cys2cyx( self, model, ss_residues ):
        """
        Rename all S-S bonded CYS into CYX.

        @param model: model
        @type  model: PDBModel
        @param ss_residues: original residue numbers of S-S pairs
        @type  ss_residues: [(int, int)]
        """
        ss = []
        for a,b in ss_residues:
            ss += [a,b]

        for a in model:
            if a['residue_number'] in ss:
                a['residue_name'] = 'CYX'


    def capACE( self, model, chain ):
        """
        Cap N-terminal of given chain.

        @param model: model
        @type  model: PDBMode
        @param chain: index of chain to be capped
        @type  chain: int
        """
        cleaner = PDBCleaner( model, log=self.log )
        return cleaner.capACE( model, chain, breaks=True )


    def capNME( self, model, chain ):
        """
        Cap C-terminal of given chain.

        @param model: model
        @type  model: PDBMode
        @param chain: index of chain to be capped
        @type  chain: int        
        """
        cleaner = PDBCleaner( model, log=self.log )
        return cleaner.capNME( model, chain, breaks=True)


    def centerModel( self, model ):
        """
        Geometric centar of model.
        
        @param model: model
        @type  model: PDBMode
        """
        center = N.average( model.getXyz() )
        model.setXyz( model.xyz - center )


    def leapModel( self, hetatm=0, center=True ):
        """
        Get a clean PDBModel for input into leap.

        @param hetatm: keep HETATM records (default: 0)
        @type  hetatm: 1|0

        @return: model
        @rtype: PDBMod
        """
        m = self.m.clone()
        m.xplor2amber()

        cleaner = PDBCleaner( m, log=self.log, verbose=self.verbose )
        m = cleaner.process( keep_hetatoms=hetatm, amber=1 )

        m.renumberResidues( addChainId=1 )

        if center:
            self.centerModel( m )

        return m


    def __fLines( self, template, values ):
        if not type( values ) is list:
            values = [ values ]

        return ''.join( [ template % v for v in values ] )


    def parmSolvated( self, f_out, f_out_crd=None, f_out_pdb=None,
                      hetatm=0, norun=0,
                      cap=0, capN=[], capC=[],
                      fmod=['frcmod.ionsjc_tip3p'], fprep=[],
                      box=10.0, center=True, **kw ):
        """
        @param f_out: target file for parm (topology)
        @type  f_out: str
        @param f_out_crd: target file for crd (coordinates)
                          (default:|f_out_base|.crd)
        @type  f_out_crd: str
        @param f_out_pdb: target file for pdb (default:|f_out_base|.pdb)
        @type  f_out_pdb: str
        @param hetatm: keep hetero atoms (default: 0)
        @type  hetatm: 1|0
        @param cap: put ACE and NME capping residue on chain breaks 
                    (default: 0)
        @type  cap: 1|0
        @param capN: indices of chains that should get ACE cap (default: [])
        @type  capN: [int]
        @param capC: indices of chains that should get NME cap (default: [])
        @type  capC: [int]
        @param box: minimal distance of solute from box edge (default: 10.0)
        @type  box: float
        @param center: re-center coordinates (default: True)
        @type  center: bool
        @param fmod: list of files with amber parameter modifications
                     to be loaded into leap with loadAmberParams
                    (default:['frcmod.ionsjc_tip3p'] ... mod file needed for 
                    default Amber ff10 ions -- topology saving will fail if this 
                    one is missing)
        @type  fmod: [str]
        @param fprep: list of files with amber residue definitions
                    (to be loaded into leap with loadAmberPrep) (default: [])
        @type  fprep: [str]
        @param kw: additional key=value pairs for leap input template
        @type  kw: key=value

        @raise IOError:
        """
        f_out = t.absfile( f_out )
        f_out_crd = t.absfile( f_out_crd ) or t.stripSuffix( f_out ) + '.crd'
        f_out_pdb = t.absfile( f_out_pdb ) or t.stripSuffix( f_out ) +\
                    '_leap.pdb'

        ## removed: (bugfix 3434136)
        #fmod  = [ t.absfile( f ) for f in t.toList( fmod )  ]
        #fprep = [ t.absfile( f ) for f in t.toList( fprep ) ]

        try:
            if self.verbose: self.log.add( '\nCleaning PDB file for Amber:' )
            m = self.leapModel( hetatm=hetatm, center=center )

            if cap:
                end_broken = m.atom2chainIndices( m.chainBreaks() )
                capC = MU.union( capC, end_broken )
                capN = MU.union( capN, N.array( end_broken ) + 1 )

            for i in capN:
                if self.verbose:
                    self.log.add( 'Adding ACE cap to chain %i' % i )
                m = self.capACE( m, i )

            for i in capC:
                if self.verbose:
                    self.log.add( 'Adding NME cap to chain %i' % i )
                m = self.capNME( m, i )

            m.renumberResidues( addChainId=1 )  ## again, to accomodate capping

            template = open( self.leap_template ).read()

            leap_mod = self.__fLines( 'm = loadAmberParams %s\n', fmod )
            leap_prep= self.__fLines( 'loadAmberPrep %s\n', fprep )

            ss = self.__ssBonds( m, cutoff=4. )
            self.__cys2cyx( m, ss )
            leap_ss  = self.__fLines( self.ss_bond, ss )
            if self.verbose:
                self.log.add('Found %i disulfide bonds: %s' % (len(ss),str(ss)))

            if self.verbose:
                self.log.add( 'writing cleaned PDB to %s'  % self.leap_pdb )
            m.writePdb( self.leap_pdb, ter=3 )

            self.__runLeap( template, in_pdb=self.leap_pdb,
                            out_parm=f_out, out_crd=f_out_crd,
                            ss_bonds=leap_ss, fmod=leap_mod,
                            fprep=leap_prep, norun=norun,
                            box=box, **kw )

            if not norun:
                parm_pdb = self.parm2pdb( f_out, f_out_crd, f_out_pdb )

            if not self.keep_leap_pdb and not self.debug:
                t.tryRemove( self.leap_pdb )

        except IOError, why:
            raise IOError, why
コード例 #43
0
def changeModel(inFile, prefix, sourceModel):

    print '\nget ' + os.path.basename(inFile) + '..',

    model = PDBModel(inFile)

    model.update()

    model = model.sort()

    eq = model.equals(sourceModel)
    if not eq[0] and eq[1]:
        raise ConvertError('source and other models are not equal: ' + str(eq))


#    model.validSource()
    model.setSource(sourceModel.validSource())

    #model.atomsChanged = 0
    for k in model.atoms:
        model.atoms[k, 'changed'] = N0.all(model[k] == sourceModel[k])

    model.xyzChanged = (0 != N0.sum(N0.ravel(model.xyz - sourceModel.xyz)))

    model.update(updateMissing=1)

    if model.xyzChanged:

        doper = PDBDope(model)

        if 'MS' in sourceModel.atoms.keys():
            doper.addSurfaceRacer(probe=1.4)

        if 'density' in sourceModel.atoms.keys():
            doper.addDensity()

        if 'foldX' in sourceModel.info.keys():
            doper.addFoldX()

        if 'delphi' in sourceModel.info.keys():
            doper.addDelphi()

    outFile = os.path.dirname( inFile ) + '/' + prefix +\
            T.stripFilename( inFile ) + '.model'

    T.dump(model, outFile)

    print '-> ' + os.path.basename(outFile)
コード例 #44
0
ファイル: PDBCleaner.py プロジェクト: ostrokach/biskit
    def capNME( self, model, chain, breaks=True ):
        """
        Cap C-terminal of given chain. 

        Note: In order to allow the capping of chain breaks,
        the chain index is, by default, based on model.chainIndex(breaks=True), 
        that means with chain break detection activated! This is not the 
        default behaviour of PDBModel.chainIndex or takeChains or chainLength.
        Please use the wrapping method capTerminals() for more convenient 
        handling of the index.

        @param model: model
        @type  model: PDBMode
        @param chain: index of chain to be capped
        @type  chain: int
        @param breaks: consider chain breaks when identifying chain boundaries
        @type  breaks: bool
        
        @return: model with added NME capping residue
        @rtype : PDBModel
        """
        if self.verbose:
            self.logWrite('Capping C-terminal of chain %i with NME.' % chain )
        m_nme   = PDBModel( self.F_nme_cap )

        c_start = model.chainIndex( breaks=breaks )
        c_end = model.chainEndIndex( breaks=breaks)
        Nterm_is_break = False
        Cterm_is_break = False
        if breaks:
            Nterm_is_break = c_start[chain] not in model.chainIndex()
            Cterm_is_break = c_end[chain] not in model.chainEndIndex()
         
        chains_before = model.takeChains( range(chain), breaks=breaks )
        m_chain       = model.takeChains( [chain], breaks=breaks )
        chains_after  = model.takeChains( range(chain+1, len(c_start)),
                                          breaks=breaks )

        m_term  = m_chain.resModels()[-1]

        ## rename overhanging residue in cap PDB, renumber cap residue
        for a in m_nme:
            if a['residue_name'] != 'NME':
                a['residue_name'] = m_term.atoms['residue_name'][0]
            else:
                a['residue_number'] = m_term.atoms['residue_number'][0]+1
                a['chain_id']       = m_term.atoms['chain_id'][0]
                a['segment_id']     = m_term.atoms['segment_id'][0]

        ## chain should not have any terminal O after capping
        m_chain.remove( ['OXT'] )            

        ## fit cap onto last residue of chain
        m_nme = m_nme.magicFit( m_term )
        
        cap = m_nme.resModels()[-1]
        serial = m_term['serial_number'][-1]+1
        cap['serial_number'] = range( serial, serial + len(cap) )

        ## concat cap on chain
        m_chain = m_chain.concat( cap, newChain=False )

        ## should be obsolete now
        if getattr( m_chain, '_PDBModel__terAtoms', []) != []:
            m_chain._PDBModel__terAtoms = [ len( m_chain ) - 1 ]
        assert m_chain.lenChains() == 1

        ## re-assemble whole model
        r = chains_before.concat( m_chain, newChain=not Nterm_is_break)
        r = r.concat( chains_after, newChain=not Cterm_is_break)

        if len(c_start) != r.lenChains( breaks=breaks ):
            raise CappingError, 'Capping NME would mask a chain break. '+\
                  'This typically indicates a tight gap with high risk of '+\
                  'clashes and other issues.'
        
        return r
コード例 #45
0
ファイル: AmberParmBuilder.py プロジェクト: ostrokach/biskit
    def parmMirror( self, f_out, f_out_crd=None, fmod=['frcmod.ionsjc_tip3p'], 
                    fprep=[], **kw ):
        """
        Create a parm7 file whose atom content (and order) exactly mirrors
        the given PDBModel. This requires two leap runs. First we get a
        temporary topology, then we identify all atoms added by leap and
        build a final topology where these atoms are deleted.
        This parm is hence NOT suited for simulations but can be used to parse
        e.g. a trajectory or PDB into ptraj.

        @param f_out: target parm file
        @type  f_out: str
        @param f_out_crd: target crd file (default: f_out but ending .crd)
        @type  f_out_crd: str
        @param fmod : list of amber Mod files (loaded with loadAmberParams)
        @type  fmod : [str]
        @param fmod : list of amber Prep files (loaded with loadAmberPrep)
        @type  fmod : [str]
        """
        f_out = t.absfile( f_out )
        f_out_crd = t.absfile( f_out_crd ) or t.stripSuffix( f_out ) + '.crd'

        ## if there are hydrogens, recast them to standard amber names
        aatm = 'HA' in self.m.atomNames() ## 'HB2' in self.m.atomNames()

        ## First leap round ##
        m_ref = self.m.clone()
        m_ref.xplor2amber( aatm=aatm, parm10=True )
        tmp_in = tempfile.mktemp( 'leap_in0.pdb' )
        m_ref.writePdb( tmp_in, ter=3 )

        tmp_parm = tempfile.mktemp( '_parm0' )
        tmp_crd  = tempfile.mktemp( '_crd0' )

        leap_mod = self.__fLines( 'm = loadAmberParams %s\n', fmod )
        leap_prep= self.__fLines( 'loadAmberPrep %s\n', fprep )

        self.__runLeap( self.script_mirror_pdb,
                        leaprc=self.leaprc, fmod=leap_mod, fprep=leap_prep,
                        in_pdb=tmp_in, out_parm=tmp_parm, out_crd=tmp_crd,
                        delete_atoms='' )

        tmp_pdb = self.parm2pdb( tmp_parm, tmp_crd,
                                 tempfile.mktemp( 'leap_out.pdb' ), aatm=aatm )

        if not self.debug:
            t.tryRemove( tmp_parm )
            t.tryRemove( tmp_crd )
            t.tryRemove( tmp_in )

        ## load model with missing atoms added by leap
        m_leap = PDBModel( tmp_pdb  )

        ## compare atom content
        iLeap, iRef = m_leap.compareAtoms( m_ref )

        ## check that ref model doesn't need any change
        if iRef != range( len( m_ref ) ):
            uLeap, uRef = m_leap.unequalAtoms( m_ref, iLeap, iRef )
            atms = m_ref.reportAtoms( uRef, n=6 )
            raise AmberError, "Cannot create exact mirror of %s.\n" % tmp_in +\
                  "Leap has renamed/deleted original atoms in %s:\n"% tmp_pdb+\
                  atms

        ## indices of atoms that were added by leap
        delStr = self.__deleteAtoms( m_leap,
                                     self.__inverseIndices( m_leap, iLeap ) )

        ## Second leap round ##
        self.__runLeap( self.script_mirror_pdb, leaprc=self.leaprc,
                        in_pdb=tmp_pdb, fmod=leap_mod, fprep=leap_prep,
                        out_parm=f_out, out_crd=f_out_crd,
                        delete_atoms=delStr )

        if not self.debug:
            t.tryRemove( tmp_pdb )
コード例 #46
0
ファイル: AmberParmBuilder.py プロジェクト: tybiot/biskit
class Test(BT.BiskitTest):
    """Test AmberParmBuilder"""

    TAGS = [BT.EXE]

    def prepare(self):
        root = T.testRoot() + '/amber/'
        self.ref = PDBModel(T.testRoot() + '/amber/1HPT_0.pdb')
        self.refdry = root + '1HPT_0dry.pdb'

        self.dryparm = tempfile.mktemp('.parm', 'dry_')
        self.drycrd = tempfile.mktemp('.crd', 'dry_')
        self.drypdb = tempfile.mktemp('.pdb', 'dry_')
        self.wetparm = tempfile.mktemp('.parm', 'wet_')
        self.wetcrd = tempfile.mktemp('.crd', 'wet_')
        self.wetpdb = tempfile.mktemp('.pdb', 'wet_')
        self.leapout = tempfile.mktemp('.out', 'leap_')

    def cleanUp(self):
        if not self.DEBUG:
            T.tryRemove(self.dryparm)
            T.tryRemove(self.drycrd)
            T.tryRemove(self.drypdb)
            T.tryRemove(self.wetparm)
            T.tryRemove(self.wetcrd)
            T.tryRemove(self.wetpdb)
            T.tryRemove(self.leapout)

    def test_AmberParmMirror(self):
        """AmberParmBuilder.parmMirror test"""
        ref = self.ref
        mask = N0.logical_not(ref.maskH2O())  ## keep protein and Na+ ion
        self.mdry = ref.compress(mask)

        self.a = AmberParmBuilder(self.mdry,
                                  verbose=self.local,
                                  leap_out=self.leapout,
                                  debug=self.DEBUG)

        self.a.parmMirror(f_out=self.dryparm, f_out_crd=self.drycrd)

        self.a.parm2pdb(self.dryparm, self.drycrd, self.drypdb)

        self.m1 = PDBModel(self.drypdb)
        self.m2 = PDBModel(self.refdry)

        eq = N0.array(self.m1.xyz == self.m2.xyz)
        self.assert_(eq.all())

    def test_AmberParmSolvated(self):
        """AmberParmBuilder.parmSolvated test"""
        ## remove waters and hydrogens
        self.mdry = self.ref.compress(self.ref.maskProtein())
        self.mdry = self.mdry.compress(self.mdry.maskHeavy())

        self.a = AmberParmBuilder(self.mdry,
                                  leap_out=self.leapout,
                                  verbose=self.local,
                                  debug=self.DEBUG)

        self.a.parmSolvated(self.wetparm,
                            f_out_crd=self.wetcrd,
                            f_out_pdb=self.wetpdb,
                            box=2.5)

        self.m3 = PDBModel(self.wetpdb)

        m3prot = self.m3.compress(self.m3.maskProtein())
        refprot = self.ref.compress(self.ref.maskProtein())
        refprot.xplor2amber()

        self.assertEqual(self.ref.lenChains(), self.m3.lenChains())
        self.assertEqual(refprot.atomNames(), m3prot.atomNames())
コード例 #47
0
ファイル: AmberParmBuilder.py プロジェクト: ostrokach/biskit
class Test(BT.BiskitTest):
    """Test AmberParmBuilder"""

    TAGS = [BT.EXE]

    def prepare(self):
        root = T.testRoot() + '/amber/'
        self.ref = PDBModel(T.testRoot() + '/amber/1HPT_0.pdb')
        self.refdry = root + '1HPT_0dry.pdb'

        self.dryparm = tempfile.mktemp('.parm', 'dry_')
        self.drycrd = tempfile.mktemp('.crd', 'dry_')
        self.drypdb = tempfile.mktemp('.pdb', 'dry_')
        self.wetparm = tempfile.mktemp('.parm', 'wet_')
        self.wetcrd = tempfile.mktemp('.crd', 'wet_')
        self.wetpdb = tempfile.mktemp('.pdb', 'wet_')
        self.leapout = tempfile.mktemp('.out', 'leap_')

    def cleanUp(self):
        if not self.DEBUG:
            T.tryRemove(self.dryparm)
            T.tryRemove(self.drycrd)
            T.tryRemove(self.drypdb)
            T.tryRemove(self.wetparm)
            T.tryRemove(self.wetcrd)
            T.tryRemove(self.wetpdb)
            T.tryRemove(self.leapout)

    def test_AmberParmMirror(self):
        """AmberParmBuilder.parmMirror test"""
        ref = self.ref
        mask = N.logical_not(ref.maskH2O())  ## keep protein and Na+ ion
        self.mdry = ref.compress(mask)

        self.a = AmberParmBuilder(self.mdry,
                                  verbose=self.local,
                                  leap_out=self.leapout,
                                  debug=self.DEBUG)

        self.a.parmMirror(f_out=self.dryparm, f_out_crd=self.drycrd)

        self.a.parm2pdb(self.dryparm, self.drycrd, self.drypdb)

        self.m1 = PDBModel(self.drypdb)
        self.m2 = PDBModel(self.refdry)

        eq = N.array(self.m1.xyz == self.m2.xyz)
        self.assert_(eq.all())

    def test_AmberParmSolvated(self):
        """AmberParmBuilder.parmSolvated test"""
        ## remove waters and hydrogens
        self.mdry = self.ref.compress(self.ref.maskProtein())
        self.mdry = self.mdry.compress(self.mdry.maskHeavy())

        self.a = AmberParmBuilder(self.mdry,
                                  leap_out=self.leapout,
                                  verbose=self.local,
                                  debug=self.DEBUG)

        self.a.parmSolvated(self.wetparm,
                            f_out_crd=self.wetcrd,
                            f_out_pdb=self.wetpdb,
                            box=2.5)

        self.m3 = PDBModel(self.wetpdb)

        m3prot = self.m3.compress(self.m3.maskProtein())
        refprot = self.ref.compress(self.ref.maskProtein())
        refprot.xplor2amber()

        self.assertEqual(self.ref.lenChains(), self.m3.lenChains())
        self.assertEqual(refprot.atomNames(), m3prot.atomNames())

    def test_capIrregular(self):
        """AmberParmBuilder.capNME & capACE test"""
        gfp = PDBModel('1GFL')
        normal = gfp.takeResidues([10, 11])
        chromo = gfp.takeResidues([64, 65])

        self.a = AmberParmBuilder(normal)
        self.m4 = self.a.capACE(normal, 0)

        self.assertEqual(len(self.m4), 17)

        ##        del chromo.residues['biomol']

        self.m5 = self.a.capACE(chromo, 0)
        self.m5 = self.a.capNME(self.m5, 0)

        self.assertEqual(self.m5.sequence(), 'XSYX')
コード例 #48
0
ファイル: AmberParmBuilder.py プロジェクト: ostrokach/biskit
class Test( BT.BiskitTest ):
    """Test AmberParmBuilder"""

    TAGS = [ BT.EXE ]

    def prepare(self):
        root = T.testRoot() + '/amber/'
        self.ref = PDBModel( T.testRoot() + '/amber/1HPT_0.pdb')
        self.refdry = root + '1HPT_0dry.pdb'

        self.dryparm = tempfile.mktemp('.parm', 'dry_')
        self.drycrd  = tempfile.mktemp('.crd', 'dry_')
        self.drypdb  = tempfile.mktemp('.pdb', 'dry_')
        self.wetparm = tempfile.mktemp('.parm', 'wet_')
        self.wetcrd  = tempfile.mktemp('.crd', 'wet_')
        self.wetpdb  = tempfile.mktemp('.pdb', 'wet_')
        self.leapout = tempfile.mktemp('.out', 'leap_')

    def cleanUp(self):
        if not self.DEBUG:
            T.tryRemove( self.dryparm )
            T.tryRemove( self.drycrd )
            T.tryRemove( self.drypdb )
            T.tryRemove( self.wetparm )
            T.tryRemove( self.wetcrd )
            T.tryRemove( self.wetpdb )
            T.tryRemove( self.leapout )
        

    def test_AmberParmMirror(self):
        """AmberParmBuilder.parmMirror test"""
        ref = self.ref
        mask = N.logical_not( ref.maskH2O() ) ## keep protein and Na+ ion
        self.mdry = ref.compress( mask )

        self.a = AmberParmBuilder( self.mdry, verbose=self.local,
                                   leap_out=self.leapout,
                                   debug=self.DEBUG )

        self.a.parmMirror(f_out=self.dryparm,
                          f_out_crd=self.drycrd )

        self.a.parm2pdb( self.dryparm, self.drycrd, self.drypdb )

        self.m1 = PDBModel(self.drypdb)
        self.m2 = PDBModel(self.refdry)

        eq = N.array( self.m1.xyz == self.m2.xyz )
        self.assert_( eq.all() )


    def test_AmberParmSolvated( self ):
        """AmberParmBuilder.parmSolvated test"""
        ## remove waters and hydrogens
        self.mdry = self.ref.compress( self.ref.maskProtein() )
        self.mdry = self.mdry.compress( self.mdry.maskHeavy() )

        self.a = AmberParmBuilder( self.mdry,
                                   leap_out=self.leapout,
                                   verbose=self.local, debug=self.DEBUG)

        self.a.parmSolvated( self.wetparm, f_out_crd=self.wetcrd,
                             f_out_pdb=self.wetpdb,
                             box=2.5 )

        self.m3 = PDBModel( self.wetpdb )

        m3prot = self.m3.compress( self.m3.maskProtein() )
        refprot= self.ref.compress( self.ref.maskProtein() )
        refprot.xplor2amber()
        
        self.assertEqual( self.ref.lenChains(), self.m3.lenChains() )
        self.assertEqual( refprot.atomNames(), m3prot.atomNames() )


    def test_capIrregular( self ):
        """AmberParmBuilder.capNME & capACE test"""
        gfp = PDBModel('1GFL')
        normal = gfp.takeResidues([10,11])
        chromo = gfp.takeResidues([64,65])

        self.a = AmberParmBuilder( normal )
        self.m4 = self.a.capACE( normal, 0 )

        self.assertEqual( len(self.m4), 17 )
        
##        del chromo.residues['biomol']

        self.m5 = self.a.capACE( chromo, 0 )
        self.m5 = self.a.capNME( self.m5, 0 )
        
        self.assertEqual( self.m5.sequence(), 'XSYX' )