Ejemplo n.º 1
0
    def byName(self, rescode, topo=None ):
        """
        Identify matching reference residue by residue name. Note: residue
        names are not guaranteed to be unique if several topology files have
        been read in (the default set of Amber topologies uses unique names
        though). The optional topo parameter can be used to specify in 
        which topology the residue is looked up.
        
        Note: residue 3 letter names are all UPPERCASE.
        
        :param rescode: three-letter name of residue to look up
        :type  rescode: str
        :param topo: optional (file) name of topology (see also: `topokeys()` )
        :type  topo: str

        :return: matching reference residue
        :rtype: AmberResidueType
        
        :raise: KeyError if the topology or residue name are not found
        """
        if topo:
            fbase = T.stripFilename( topo )
            return self.topoindex[ fbase ][ rescode ]

        for topo, residues in self.topoindex.items():
            if rescode in residues:
                return residues[rescode]

        raise KeyError('No residue type found for name '+str(rescode))
Ejemplo n.º 2
0
    def byName(self, rescode, topo=None ):
        """
        Identify matching reference residue by residue name. Note: residue
        names are not guaranteed to be unique if several topology files have
        been read in (the default set of Amber topologies uses unique names
        though). The optional topo parameter can be used to specify in 
        which topology the residue is looked up.
        
        Note: residue 3 letter names are all UPPERCASE.
        
        :param rescode: three-letter name of residue to look up
        :type  rescode: str
        :param topo: optional (file) name of topology (see also: `topokeys()` )
        :type  topo: str

        :return: matching reference residue
        :rtype: AmberResidueType
        
        :raise: KeyError if the topology or residue name are not found
        """
        if topo:
            fbase = T.stripFilename( topo )
            return self.topoindex[ fbase ][ rescode ]

        for topo, residues in self.topoindex.items():
            if rescode in residues:
                return residues[rescode]

        raise KeyError('No residue type found for name '+str(rescode))
Ejemplo n.º 3
0
    def failed( self ):
        """
        If HEX job fails
        """
        print("FAILED: ", self.host, ' ', t.stripFilename(self.finp))
        print("\tJob details:")
        print("\tCommand: ", self.cmd)
        print("\tinput:   ", self.finp)
        print("\tHex log: ", self.log)
        print("\tHex out: ", self.fout)
        print()
        print("\t", t.lastError())

        self.owner.failedHex( self )
Ejemplo n.º 4
0
    def idFromName(self, fname):
        """
        Extract PDB code from file name.
        :param fname: file name
        :type  fname: str
        :return: first 4 letters of filename if available
        :rtype: str
        """
        name = T.stripFilename(fname)

        if len(name) > 3:
            return name[:4]

        return ''
Ejemplo n.º 5
0
    def failed(self):
        """
        If HEX job fails
        """
        print("FAILED: ", self.host, ' ', t.stripFilename(self.finp))
        print("\tJob details:")
        print("\tCommand: ", self.cmd)
        print("\tinput:   ", self.finp)
        print("\tHex log: ", self.log)
        print("\tHex out: ", self.fout)
        print()
        print("\t", t.lastError())

        self.owner.failedHex(self)
Ejemplo n.º 6
0
    def idFromName( self, fname ):
        """
        Extract PDB code from file name.
        :param fname: file name
        :type  fname: str
        :return: first 4 letters of filename if available
        :rtype: str
        """
        name = T.stripFilename( fname )

        if len( name ) > 3:
            return name[:4]

        return ''
Ejemplo n.º 7
0
    def run(self):
        """
        Run HEX job.

        @raise DockerError: if HEX exists with error
        """
        try:
            if not os.path.exists(self.fout):

                if self.verbose:
                    print("Executing on ", self.host, ' with ', \
                          t.stripFilename(self.finp))
                    print("Command: ", self.cmd)

                cmd_lst = self.cmd.split()

                ## self.status = os.spawnvp(os.P_WAIT, cmd_lst[0], cmd_lst )

                p = subprocess.Popen(
                    cmd_lst,
                    executable=cmd_lst[0],
                    universal_newlines=True,
                    stdout=subprocess.DEVNULL,  ## see flog
                )
                self.pid = p.pid

                output, error = p.communicate()
                self.status = p.returncode

                if self.status != 0:
                    raise DockerError('Hex returned exit status %i' %
                                      self.status)

                waited = 0
                while waited < 25 and not os.path.exists(self.fout):
                    sleep(5)
                    waited += 5

            ## replace model numbers in HEX output file
            self.__hackHexOut(self.nRec, self.nLig, self.fout)

            parser = HexParser(self.fout, self.owner.recDic, self.owner.ligDic)

            ## generate ComplexList from hex output
            self.result = parser.parseHex()

            self.done()

        except:
            self.failed()
Ejemplo n.º 8
0
    def run( self ):
        """
        Run HEX job.

        @raise DockerError: if HEX exists with error
        """
        try:
            if not os.path.exists( self.fout ):

                if self.verbose:
                    print("Executing on ", self.host, ' with ', \
                          t.stripFilename(self.finp))
                    print("Command: ", self.cmd)

                cmd_lst = self.cmd.split()

                ## self.status = os.spawnvp(os.P_WAIT, cmd_lst[0], cmd_lst )
                
                p = subprocess.Popen( cmd_lst, executable=cmd_lst[0],
                                      universal_newlines=True, 
                                      stdout=subprocess.DEVNULL, ## see flog
                                      )
                self.pid = p.pid
    
                output, error = p.communicate()
                self.status = p.returncode

                if self.status != 0:
                    raise DockerError('Hex returned exit status %i' % self.status)

                waited = 0
                while waited < 25 and not os.path.exists( self.fout ):
                    sleep( 5 )
                    waited += 5


            ## replace model numbers in HEX output file
            self.__hackHexOut( self.nRec, self.nLig, self.fout )

            parser = HexParser(self.fout, self.owner.recDic,
                               self.owner.ligDic)

            ## generate ComplexList from hex output
            self.result = parser.parseHex()

            self.done()

        except:
            self.failed()
Ejemplo n.º 9
0
    def addTopology(self, topofile, override=False):
        """
        Include an additional topology (off) library in the collection.
        
        :param topofile: file name of topology, either full path or
                         simple file name which will then be looked for in 
                         biskit/data/amber/residues.
        :type  topofile: str
        
        :param override: override topologies or residue entries with same name
                         (default False)
        :type  override: False
        
        :return: dictionary of all residue types parsed from topofile indexed
                 by three-letter residue name
        :rtype : {str : AmberResidueType}
        
        :raise: AmberResidueLibraryError if override==False and a topology or
                a residue with identical atom content have already been 
                registered.
        """
        fbase = T.stripFilename( topofile )

        if fbase in self.topoindex and not override:
            raise AmberResidueLibraryError('duplicate topology '+fbase)

        if self.verbose:
            self.log.add('parsing %s...' % topofile )

        resdic = AmberPrepParser( topofile ).residueDict()
        
        if self.verbose:
            self.log.add( 'Read %i residue definitions.\n' % len(resdic) )
        
        self.topoindex[ fbase ] = resdic
        
        for resname, restype in resdic.items():
            akey = restype.atomkey(compress=False)
            
            if akey in self.aindex and not override:
                raise AmberResidueLibraryError('duplicate residue entry: %s -> %s' %\
                      (resname, self.aindex[akey].code))

            self.aindex[ akey ] = restype
        
        return self.topoindex[ fbase ]
Ejemplo n.º 10
0
    def addTopology(self, topofile, override=False):
        """
        Include an additional topology (off) library in the collection.
        
        :param topofile: file name of topology, either full path or
                         simple file name which will then be looked for in 
                         Biskit/data/amber/residues.
        :type  topofile: str
        
        :param override: override topologies or residue entries with same name
                         (default False)
        :type  override: False
        
        :return: dictionary of all residue types parsed from topofile indexed
                 by three-letter residue name
        :rtype : {str : AmberResidueType}
        
        :raise: AmberResidueLibraryError if override==False and a topology or
                a residue with identical atom content have already been 
                registered.
        """
        fbase = T.stripFilename( topofile )

        if fbase in self.topoindex and not override:
            raise AmberResidueLibraryError('duplicate topology '+fbase)

        if self.verbose:
            self.log.add('parsing %s...' % topofile )

        resdic = AmberPrepParser( topofile ).residueDict()
        
        if self.verbose:
            self.log.add( 'Read %i residue definitions.\n' % len(resdic) )
        
        self.topoindex[ fbase ] = resdic
        
        for resname, restype in resdic.items():
            akey = restype.atomkey(compress=False)
            
            if akey in self.aindex and not override:
                raise AmberResidueLibraryError('duplicate residue entry: %s -> %s' %\
                      (resname, self.aindex[akey].code))

            self.aindex[ akey ] = restype
        
        return self.topoindex[ fbase ]
Ejemplo n.º 11
0
    def addMovie( self, pdb, modName=None ):
        """
        Add one or several existing pdb files or Structure objects
        to one model. Several files will hence end up as single movie
        (i.e. as frames of a model in PyMol).
 
        @param pdb: file name or a list of file names OR
                    PDBModel or list of PDBModels
        @type  pdb: str or [str] OR PDBModel or [PDBModel]
        @param modName: model name, will show up in PyMol. If 'None' a
                        model name will be created from the source file
                        name and a serial number.
        @type  modName: str OR None

        @return: the modName of the added model
        @rtype: str        
        """
        if type( pdb ) is not list:
            pdb = [pdb]

        ## dream up a nice model name
        if modName is None:

            if type( pdb[0]) is str:
                modName = T.stripFilename( pdb[0] )
                modName = self._getFreeModName( modName, 0 )

            else:
                modName = self._getFreeModName( 'models', 0 )

        ## create new empty list
        if modName not in self.dic:
            self.dic[ modName ] = []

        ## create model object for each file and append it to dic
        for f in pdb:

            ## create model from Structure or file name
            model = PymolModel( f, modName )
            self.dic[ modName ].append( model )

            ## add load statement to Pymol script
            self.add( 'load '+ model.fname + ',' + modName )

        return modName
Ejemplo n.º 12
0
    def test_TrajParsePDBs(self):
        """TrajParsePDBs test"""
        import os
        f = T.testRoot('amber/md_pdbs/')
        allfiles = os.listdir(f)
        pdbs = []
        for fn in allfiles:
            try:
                if (fn[-4:].upper() == '.PDB'):
                    pdbs += [f + fn]
            except:
                pass

        ref = pdbs[0]

        self.assertTrue(TrajParsePDBs.supports(pdbs))
        p = TrajParsePDBs(verbose=self.local, rmwat=True, analyzeEach=False)
        t = p.parse2new(pdbs, ref=ref)

        self.assertEqual(t.lenAtoms(), 876)
        self.assertEqual(len(t), 10)
        self.assertEqual(t.frameNames, [T.stripFilename(f) for f in pdbs])
Ejemplo n.º 13
0
def createHexInp(recPdb,
                 recModel,
                 ligPdb,
                 ligModel,
                 comPdb=None,
                 outFile=None,
                 macDock=None,
                 silent=0,
                 sol=512):
    """
    Prepare a Hex macro file for the docking of the receptor(s)
    against ligand(s).

    @param recPdb: hex-formatted PDB
    @type  recPdb: str
    @param recModel: hex-formatted PDB
    @type  recModel: str
    @param ligPdb: PDBModel, get distances from this one
    @type  ligPdb: PDBModel
    @param ligModel: PDBModel, getdistances from this one
    @type  ligModel: PDBModel
    @param comPdb: reference PDB
    @type  comPdb: str
    @param outFile: base of file name for mac and out
    @type  outFile: str

    @param macDock: None -> hex decides (from the size of the molecule),
                    1 -> force macroDock, 0-> force off (default: None)
    @type  macDock: None|1|0
    @param silent: don't print distances and macro warnings (default: 0)
    @type  silent: 0|1
    @param sol: number of solutions that HEx should save (default: 512)
    @type  sol: int

    @return: HEX macro file name, HEX out generated bu the macro,
             macro docking status
    @rtype: str, str, boolean
    """
    ## files and names
    recCode = t.stripFilename(recPdb)[0:4]
    ligCode = t.stripFilename(ligPdb)[0:4]

    outFile = outFile or recCode + '-' + ligCode

    ## hex macro name
    macName = t.absfile(outFile + '_hex.mac')

    ## hex rotation matrix output name
    outName_all = t.absfile(outFile + '_hex.out')
    outName_clust = t.absfile(outFile + '_hex_cluster.out')

    ## add surface profiles if not there
    if 'relAS' not in recModel.atoms:
        #t.flushPrint('\nCalculating receptor surface profile')
        rec_asa = PDBDope(recModel)
        rec_asa.addSurfaceRacer()
    if 'relAS' not in ligModel.atoms:
        #t.flushPrint('\nCalculating ligand surface profile')
        lig_asa = PDBDope(ligModel)
        lig_asa.addSurfaceRacer()

    ## surface masks, > 95% exposed
    rec_surf_mask = N0.greater(recModel.profile('relAS'), 95)
    lig_surf_mask = N0.greater(ligModel.profile('relAS'), 95)

    ## maximun and medisn distance from centre of mass to any surface atom
    recMax, recMin = centerSurfDist(recModel, rec_surf_mask)
    ligMax, ligMin = centerSurfDist(ligModel, lig_surf_mask)

    ## approxinate max and min center to centre distance
    maxDist = recMax + ligMax
    minDist = recMin + ligMin

    ## molecular separation and search range to be used in the docking
    molSep = (maxDist + minDist) / 2
    molRange = 2 * (maxDist - molSep)

    if not silent:
        print(
            'Docking setup: %s\nRecMax: %.1f RecMin: %.1f\nLigMax: %.1f LigMin: %.1f\nMaxDist: %.1f MinDist: %.1f\nmolecular_separation: %.1f r12_range: %.1f\n'
            % (outFile, recMax, recMin, ligMax, ligMin, maxDist, minDist,
               molSep, molRange))

    if recMax > 30 and ligMax > 30 and not silent:
        print('\nWARNING! Both the receptor and ligand radius is ', end=' ')
        print('greater than 30A.\n')

    ## determine docking mode to use
    macroDocking = 0

    if macDock == None:
        if recMax > 35 and not silent:
            print('\nReceptor has a radius that exceeds 35A ', end=' ')
            print('-> Macro docking will be used')
            macroDocking = 1
    else:
        macroDocking = macDock

    #####################
    ## write macro file

    macOpen = open(macName, 'w')

    macOpen.write('# -- ' + macName + ' --\n')
    macOpen.write(' \n')
    macOpen.write('open_receptor ' + t.absfile(recPdb) + '\n')
    macOpen.write('open_ligand ' + t.absfile(ligPdb) + '\n')

    if comPdb and comPdb[-4:] == '.pdb':
        macOpen.write('open_complex ' + comPdb + '\n')

    macOpen.write('\n')

    head = """
# -------------- general settings ----------------
disc_cache 1                   # disc cache on (0 off)
docking_sort_mode 1            # Sort solutions by cluster (0 by energy)
docking_cluster_mode 1         # Display all clusters (0 display best)
docking_cluster_threshold 2.00
# docking_cluster_bumps  number

# ------------ molecule orientation --------------
molecule_separation %(separation)i
commit_view """ % ({
        'separation': round(molSep)
    })

    macro = """
# -------------- macro docking -------------------
macro_min_coverage 25
macro_sphere_radius 15
macro_docking_separation 25
activate_macro_model"""


    tail = """
# -------------- docking setup -------------------
docking_search_mode 0          # full rotational search

receptor_range_angle  180      # 0, 15, 30, 45, 60, 75, 90, 180
docking_receptor_samples 720   # 362, 492, 642, 720, 980, 1280

ligand_range_angle  180
docking_ligand_samples 720

twist_range_angle 360          # 0, 15, 30, 60, 90, 180, 360
docking_alpha_samples 128      # 64, 128, 256

r12_step 0.500000              # 0.1, 0.2, 0.25, 0.5, 0.75, 1, 1.5, 2
r12_range %(range)i

docking_radial_filter 0        # Radial Envelope Filter - None

grid_size 0.600                # 0.4, 0.5, 0.6, 0.75, 1.0
# docking_electrostatics 0       # use only surface complimentarity
docking_electrostatics 1      # use electrostatic term for scoring clusters

docking_main_scan 16     # 
docking_main_search 26

max_docking_solutions %(nr_sol)i # number of solutions to save

# -------------- post-processing ----------------
docking_refine 0    # None
#  docking_refine 1    # Backbone Bumps
#  docking_refine 2    # MM energies
#  docking_refine 3    # MM minimization

# ---------------- run docking ------------------
activate_docking
#  save_docking %(output_clust)s
#  save_range 1 512 ./ dock .pdb

# ------------ also save all solutions ----------
docking_sort_mode 0            # Sort solutions by energy (1 by cluster)
save_docking %(output_all)s""" \
         %({'range':round(molRange), 'output_all':outName_all,
            'nr_sol':int(sol), 'output_clust':outName_clust} )

    macOpen.writelines(head)

    ## macro docking will not work with multiple models, if both are added to
    ## the hex macro file - macrodocking will be skipped during the docking run
    if macroDocking:
        macOpen.writelines(macro)

    macOpen.writelines(tail)

    macOpen.close()

    return macName, outName_all, macroDocking
Ejemplo n.º 14
0
    def __parseBiomt( self, pdbFile, firstLine):
        """
        Extract BIOMT (biological unit) information from REMARK 350 lines
        Creates a 'BIOMT' dictionary.
        """
        line = firstLine
        biomtDict = {}
        moleculeNum = -1

        while line[0] == 'REMARK' and line[1].startswith(' 350'):
            # 5 = len(' 350 ')
            biomtLine = line[1][5:].lstrip()
            if biomtLine.startswith('BIOMOLECULE:'): # start a new molecule

                if moleculeNum != -1:   
                    # lets update the dictionary with what we've got
                    biomtDict[moleculeNum] = (targetChains,rtList)

                #12 = len('BIOMOLECULE:')
                moleculeNum = int(biomtLine[12:].strip())
                targetChains = []
                rotation = []
                translation = []
                rtList = []

                matrixLine = 0

            if biomtLine.startswith('APPLY THE FOLLOWING TO CHAINS:'):  
            # parse targeted chains, we assume this comes after BIOMOLECULE line
                # 30 = len('APPLY THE FOLLOWING TO CHAINS:')
                targetChains.extend(c.strip() for c in biomtLine[30:].split(','))
            if biomtLine.startswith('AND CHAINS:'):  
                # 11 = len('AND CHAINS:')
                targetChains.extend(c.strip() for c in biomtLine[11:].split(','))

            if biomtLine.startswith('BIOMT'):  
            # parse rotate-translate matri{x/ces}, we assume this comes after BIOMOLECULE line
                matrixLine += 1
                # 6 = len('BIOMT#')
                rawCoords = biomtLine[6:].split()
                rotation.append([float(x) for x in rawCoords[1:4]])
                translation.append(float(rawCoords[4]))
                if matrixLine % 3 == 0:
                    rotation = N0.array( rotation )
                    translation = N0.transpose( [ translation ] )
                    rotation = N0.concatenate( (rotation, translation), axis=1 )
                    rtList.append(N0.array(rotation))
                    ## rtList.append((rotation,translation))
                    rotation = []
                    translation = []

            try:
                line = pdbFile.readLine()
            except ValueError as what:
                self.log.add('Warning: Error parsing line %i of %s' % 
                             (i, T.stripFilename( fname )) )
                self.log.add('\tError: '+str(what) )
                continue
        # process last molecule group
        biomtDict[moleculeNum] = (targetChains,rtList)
        # return (indexed transformation dictionary , last line which isn't ours)
        return {'BIOMT': biomtDict}, line
Ejemplo n.º 15
0
    def parse2new(self, source, ref=None, traj=None):
        """
        Create / Replace Trajectory from the source list of PDBModels or PDBs.
        
        Args:
            source (str): list of file names or PDBModel instances
            ref (str or PDBModel): reference structure instance or file
            traj (Biskit.md.Trajectory): existing instance to be updated

        Returns:
           Biskit.Trajectory: new Trajectory instance
        """
        r = traj
        if traj is None:
            import biskit.md
            r = biskit.md.Trajectory()

        r.setRef(B.PDBModel(ref or source[0]))
        n_frames = len(source)

        if self.rmwat:
            r.ref = r.ref.compress(N.logical_not(r.ref.maskSolvent()))

        r.resIndex = r.ref.resMap()
        refNames = r.ref.atomNames()  ## cache for atom checking

        if self.verbose: T.errWrite('reading %i pdbs...' % n_frames)

        r.frames = N.zeros(
            (n_frames, r.ref.lenAtoms(), 3))  ## target coordinate array
        r.frameNames = ['#%i07' % i for i in range(n_frames)]

        atomCast = None
        reportIntervall = 1 if n_frames < 100 else round(n_frames / 100)

        for i, f in enumerate(source):

            m = B.PDBModel(f)

            ## compare atom order & content of first frame to reference pdb
            if self.analyzeEach or i == 0:
                atomCast, castRef = m.compareAtoms(r.ref)

                if castRef != list(range(r.ref.lenAtoms())):
                    ## we can remove/reorder atoms from each frame but not from ref
                    raise P.TrajParserError("Reference PDB doesn't match %s." %
                                            m.fileName)

                if N.all(atomCast == list(range(len(m)))):
                    atomCast = None  ## no casting necessary
                else:
                    if self.verbose: T.errWrite(' casting ')

            ## assert that frame fits reference
            if atomCast:
                m = m.take(atomCast)

            ## additional check on each 100st frame
            if i % reportIntervall == 0 and m.atomNames() != refNames:
                raise P.TrajParserError("%s doesn't match reference pdb." %
                                        m.fileName)

            r.frames[i] = m.xyz

            if type(f) is str:  ## save original file name
                r.frameNames[i] = T.stripFilename(f)

            if i % reportIntervall == 0 and self.verbose:
                T.errWrite('#')

        if self.verbose: T.errWrite('done\n')
        return r
Ejemplo n.º 16
0
    def __collectAll(self, fname, skipRes=None, headPatterns=[]):
        """
        Parse ATOM/HETATM lines from PDB. Collect coordinates plus
        dictionaries with the other pdb records of each atom.
        REMARK, HEADER, etc. lines are ignored.

        Some changes are made to the dictionary from PDBFile.readline()::
            - the 'position' entry (with the coordinates) is removed
            - leading and trailing spaces are removed from 'name' ..
            - .. but a 'name_original' entry keeps the old name with spaces
            - a 'type' entry is added. Its value is 'ATOM' or 'HETATM'
            - a 'after_ter' entry is added. Its value is 1, if atom is
              preceeded by a 'TER' line, otherwise 0
            - empty 'element' entries are filled with the first non-number
              letter from the atom 'name'

        :param fname: name of pdb file
        :type  fname: str
        :param skipRes: list with residue names that should be skipped
        :type  skipRes: list of str

        :return: tuple of (1) dictionary of profiles
                 and (2) xyz array N x 3
        :rtype: ( list, array )
        """
        xyz = []

        aProfs = {}

        info = {}

        in_header = True

        headPatterns = headPatterns or self.RE_REMARKS
        patterns = [(key, re.compile(ex)) for key, ex in headPatterns]

        for k in B.PDBModel.PDB_KEYS:
            aProfs[k] = list()

        f = IO.PDBFile(fname)

        skipLine = False

        try:
            line, i = ('', ''), 0

            while line[0] != 'END' and line[0] != 'ENDMDL':

                i += 1
                if not skipLine:
                    try:
                        line = f.readLine()
                    except ValueError as what:
                        self.log.add('Warning: Error parsing line %i of %s' %
                                     (i, T.stripFilename(fname)))
                        self.log.add('\tError: ' + str(what))
                        continue
                else:
                    skipLine = False

                ## header handling
                if in_header and line[0] == 'HEADER':
                    info.update(self.__parseHeader(line))

                if in_header and line[0] == 'REMARK':
                    if line[1].startswith(' 350'):
                        biomtDict, line = self.__parseBiomt(f, line)
                        info.update(biomtDict)
                        # we've hogged a line beyond REMARK 350 records in
                        # __parseBiomt(), now we need to process it here
                        skipLine = True
                        continue
                    else:
                        info.update(self.__parseRemark(line, patterns))

                ## preserve position of TER records
                newChain = line[0] == 'TER'
                if newChain:
                    line = f.readLine()

                if (line[0] in ['ATOM', 'HETATM']):

                    if in_header:
                        in_header = False  ## switch off HEADER parsing

                    a = line[1]

                    if skipRes and a['residue_name'] in skipRes:
                        continue

                    a['name_original'] = a['name']
                    a['name'] = a['name'].strip()

                    a['type'] = line[0]

                    if newChain:
                        a['after_ter'] = 1
                    else:
                        a['after_ter'] = 0

                    if a['element'] == '':
                        a['element'] = self.__firstLetter(a['name'])

                    xyz.append(a['position'])

                    del a['position']

                    for k, v in a.items():
                        aProfs[k].append(v)

        except:
            raise PDBParserError("Error parsing file "+fname+": " \
                                 + T.lastError())
        try:
            f.close()
        except:
            pass

        if len(xyz) == 0:
            raise PDBParserError("Error parsing file " + fname + ": " +
                                 "Couldn't find any atoms.")

        return aProfs, N0.array(xyz, N0.Float32), info
Ejemplo n.º 17
0
    def __parseBiomt(self, pdbFile, firstLine):
        """
        Extract BIOMT (biological unit) information from REMARK 350 lines
        Creates a 'BIOMT' dictionary.
        """
        line = firstLine
        biomtDict = {}
        moleculeNum = -1

        while line[0] == 'REMARK' and line[1].startswith(' 350'):
            # 5 = len(' 350 ')
            biomtLine = line[1][5:].lstrip()
            if biomtLine.startswith('BIOMOLECULE:'):  # start a new molecule

                if moleculeNum != -1:
                    # lets update the dictionary with what we've got
                    biomtDict[moleculeNum] = (targetChains, rtList)

                #12 = len('BIOMOLECULE:')
                moleculeNum = int(biomtLine[12:].strip())
                targetChains = []
                rotation = []
                translation = []
                rtList = []

                matrixLine = 0

            if biomtLine.startswith('APPLY THE FOLLOWING TO CHAINS:'):
                # parse targeted chains, we assume this comes after BIOMOLECULE line
                # 30 = len('APPLY THE FOLLOWING TO CHAINS:')
                targetChains.extend(c.strip()
                                    for c in biomtLine[30:].split(','))
            if biomtLine.startswith('AND CHAINS:'):
                # 11 = len('AND CHAINS:')
                targetChains.extend(c.strip()
                                    for c in biomtLine[11:].split(','))

            if biomtLine.startswith('BIOMT'):
                # parse rotate-translate matri{x/ces}, we assume this comes after BIOMOLECULE line
                matrixLine += 1
                # 6 = len('BIOMT#')
                rawCoords = biomtLine[6:].split()
                rotation.append([float(x) for x in rawCoords[1:4]])
                translation.append(float(rawCoords[4]))
                if matrixLine % 3 == 0:
                    rotation = N0.array(rotation)
                    translation = N0.transpose([translation])
                    rotation = N0.concatenate((rotation, translation), axis=1)
                    rtList.append(N0.array(rotation))
                    ## rtList.append((rotation,translation))
                    rotation = []
                    translation = []

            try:
                line = pdbFile.readLine()
            except ValueError as what:
                self.log.add('Warning: Error parsing line %i of %s' %
                             (i, T.stripFilename(fname)))
                self.log.add('\tError: ' + str(what))
                continue
        # process last molecule group
        biomtDict[moleculeNum] = (targetChains, rtList)
        # return (indexed transformation dictionary , last line which isn't ours)
        return {'BIOMT': biomtDict}, line
Ejemplo n.º 18
0
    p.ylabel= 'RMSD [Å]'
    return p


########
## MAIN
########

syntax()

## get and clean up options
o = options()
o['step'] = int( o['step'] )
o['i'] = T.absfile( o['i'] )
o['o'] = o.get('o',
              '%s/%s_rms.eps' % (osp.dirname(o['i']), T.stripFilename(o['i'])))
o['show'] = 'show' in o

T.flushPrint( "Loading..." )
t = T.load( o['i'] )
T.flushPrint( "done loading trajectory with %i frames." % len(t) )

if o['step'] != 1:
    t = t.thin( o['step'] ) 

T.flushPrint( "Fitting ...")
calcRmsd( t )
T.flushPrint( "done." )

p = plot( t, o.get( 'title', T.stripFilename(o['i']) ) )
Ejemplo n.º 19
0
    def __collectAll( self, fname, skipRes=None, headPatterns=[] ):
        """
        Parse ATOM/HETATM lines from PDB. Collect coordinates plus
        dictionaries with the other pdb records of each atom.
        REMARK, HEADER, etc. lines are ignored.

        Some changes are made to the dictionary from PDBFile.readline()::
            - the 'position' entry (with the coordinates) is removed
            - leading and trailing spaces are removed from 'name' ..
            - .. but a 'name_original' entry keeps the old name with spaces
            - a 'type' entry is added. Its value is 'ATOM' or 'HETATM'
            - a 'after_ter' entry is added. Its value is 1, if atom is
              preceeded by a 'TER' line, otherwise 0
            - empty 'element' entries are filled with the first non-number
              letter from the atom 'name'

        :param fname: name of pdb file
        :type  fname: str
        :param skipRes: list with residue names that should be skipped
        :type  skipRes: list of str

        :return: tuple of (1) dictionary of profiles
                 and (2) xyz array N x 3
        :rtype: ( list, array )
        """
        xyz   = []

        aProfs = {}

        info = {}

        in_header = True
        
        headPatterns = headPatterns or self.RE_REMARKS
        patterns = [ (key, re.compile(ex)) for key,ex in headPatterns ]
        
        for k in B.PDBModel.PDB_KEYS:
            aProfs[k] = list()

        f = IO.PDBFile( fname )

        skipLine = False

        try:
            line, i = ('',''), 0

            while line[0] != 'END' and line[0] != 'ENDMDL':

                i += 1
                if not skipLine:
                    try:
                        line = f.readLine()
                    except ValueError as what:
                        self.log.add('Warning: Error parsing line %i of %s' %
                                     (i, T.stripFilename( fname )) )
                        self.log.add('\tError: '+str(what) )
                        continue
                else:
                    skipLine = False

                ## header handling
                if in_header and line[0] == 'HEADER':
                    info.update( self.__parseHeader( line ) )

                if in_header and line[0] == 'REMARK':
                    if line[1].startswith(' 350'):
                        biomtDict, line = self.__parseBiomt( f, line )
                        info.update( biomtDict )
                        # we've hogged a line beyond REMARK 350 records in 
                        # __parseBiomt(), now we need to process it here
                        skipLine = True
                        continue
                    else:
                        info.update( self.__parseRemark( line, patterns ) )
                    

                ## preserve position of TER records
                newChain = line[0] == 'TER'
                if newChain:
                    line = f.readLine()

                if (line[0] in ['ATOM','HETATM'] ):

                    if in_header: in_header = False  ## switch off HEADER parsing
                    
                    a = line[1]

                    if skipRes and a['residue_name'] in skipRes:
                        continue

                    a['name_original'] = a['name']
                    a['name'] = a['name'].strip()

                    a['type'] = line[0]

                    if newChain:
                        a['after_ter'] = 1
                    else:
                        a['after_ter'] = 0

                    if a['element'] == '':
                        a['element'] = self.__firstLetter( a['name'] )

                    xyz.append( a['position'] )

                    del a['position']

                    for k, v in a.items():
                        aProfs[k].append( v )

        except:
            raise PDBParserError("Error parsing file "+fname+": " \
                                 + T.lastError())
        try:
            f.close()
        except:
            pass

        if len( xyz ) == 0:
            raise PDBParserError("Error parsing file "+fname+": "+
                            "Couldn't find any atoms.")

        return aProfs, N0.array( xyz, N0.Float32 ), info