def buildPdb(map_dict, npts, name='DlgBuilt', ctr=0, outputfile='results.pdb', 
                    scale=1.0):
    if debug: print "in buildPdb: tolerance=", tolerance
    name = 'DlgBuilt'
    mol = Protein(name=name)
    mol.curChain = Chain()
    mol.chains = ChainSet([mol.curChain])
    mol.curRes = Residue()
    mol.curChain.adopt(mol.curRes)
    mol.allAtoms = AtomSet()
    mol.curRes.atoms = mol.allAtoms
    nzpts=nypts=nxpts = npts
    #nxpts, nypts, nzpts = npts
    ctr = 0
    for ADtype, m in map_dict.items():
        if debug: 
            print "PROCESSING ", ADtype, " array:", max(m.ravel()), ':', min(m.ravel())
        vals = []
        tctr = 0  #for number of each type
        for z in range(nzpts):
            for y in range(nypts):
                for x in range(nxpts):
                    val = scale * abs(m[x,y,z])
                    vals.append(val)
                    #if abs(val)>.005:
                    if val>tolerance*scale:
                        ctr += 1
                        name = ADtype + str(ctr)
                        #version3:
                        #info_lo = (xcen - numxcells*spacing,
                        #    ycen - numycells*spacing, 
                        #    zcen - numzcells *spacing)
                        #using lower back pt of cube, i think
                        #xcoord = (x-info_lo[0])/spacing
                        #ycoord = (y-info_lo[1])/spacing
                        #zcoord = (z-info_lo[2])/spacing
                        #version2:
                        xcoord = (x-numxcells)*spacing + xcen
                        ycoord = (y-numycells)*spacing + ycen
                        zcoord = (z-numzcells)*spacing + zcen
                        coords = (xcoord,ycoord,zcoord)
                        tctr += 1
                    #    #print "addAtom: name=",name,"ADtype=", ADtype," val=", val, "coords=", coords,"ctr=", ctr
                        addAtom(mol, name, ADtype, val, coords, ctr)
        print "added ",tctr, '<-', ADtype, " atoms"
        if debug:
            print ADtype, ':', tctr , ' ', ctr
    print "total atoms=", ctr
    writer = PdbWriter()
    writer.write(outputfile, mol.allAtoms, records=['ATOM'])
def test_secondaryStructure():
    from MolKit.pdbParser import PdbParser
    from MolKit.protein import Protein
    print 'create an object Protein crn'
    crn = Protein()
    print 'read the pdb file'
    crn.read('/tsri/pdb/struct/1crn.pdb', PdbParser())
    print 'create an object secondarystructureSet for each chain of crn'
    crn.getSS()
    print 'create the geometries for each structures of crn'
    extrudestructure = []
    for c in range(len(crn.chains)):
        for i in range(len(crn.chains[c].secondarystructureset)):
            extrudestructure.append(crn.chains[c].secondarystructureset[i].extrudeSS())
    def copy(self, newname=None):
        """copy makes a new Protein instance with 'newname' and 
        other protein level parameters from self. Next,self.allAtoms is copied
        atom by atom. First: '_fit_atom_into_tree', which uses the same
        logic as pdbParser, builds up new instances of residues and chains
        as necessary.  Then: _copy_atom_attr copies the remaining
        String, Int, Float, None, List and Tuple attributes into new atom
        instances. The new molecule is returned by copy. 
        NB: subsequently the two copies can be visualized: 
        copy2=mv.Mols[0].copy()
        mv.addMolecule(copy2)
        mv.GUI.VIEWER.TransformRootOnly( yesno=0)
        mv.GUI.VIEWER.currentObject=copy2.geomContainer.geoms['master']
        then mouse movements would move only copy2, the new object """

        if not newname: newname = self.name + "_copy"
        newmol=Protein(name=newname, parent=self.parent,
            elementType=self.elementType, childrenName=self.childrenName,
            setClass=self.setClass, childrenSetClass=self.childrenSetClass,
            top=self.top)
        newmol.curChain=Chain()
        newmol.curRes=Residue()
        newmol.allAtoms= AtomSet()
        newmol.parser = self.parser
        for at in self.allAtoms:
            self._fit_atom_into_tree(newmol, at)
        newmol.buildBondsByDistance()
        return newmol
def buildPdb(map_dict,
             npts,
             name='DlgBuilt',
             ctr=0,
             outputfile='results.pdb',
             scale=1.0):
    if debug: print "in buildPdb: tolerance=", tolerance
    name = 'DlgBuilt'
    mol = Protein(name=name)
    mol.curChain = Chain()
    mol.chains = ChainSet([mol.curChain])
    mol.curRes = Residue()
    mol.curChain.adopt(mol.curRes)
    mol.allAtoms = AtomSet()
    mol.curRes.atoms = mol.allAtoms
    nzpts = nypts = nxpts = npts
    #nxpts, nypts, nzpts = npts
    ctr = 0
    for ADtype, m in map_dict.items():
        if debug:
            print "PROCESSING ", ADtype, " array:", max(m.ravel()), ':', min(
                m.ravel())
        vals = []
        tctr = 0  #for number of each type
        for z in range(nzpts):
            for y in range(nypts):
                for x in range(nxpts):
                    val = scale * abs(m[x, y, z])
                    vals.append(val)
                    #if abs(val)>.005:
                    if val > tolerance * scale:
                        ctr += 1
                        name = ADtype + str(ctr)
                        #version3:
                        #info_lo = (xcen - numxcells*spacing,
                        #    ycen - numycells*spacing,
                        #    zcen - numzcells *spacing)
                        #using lower back pt of cube, i think
                        #xcoord = (x-info_lo[0])/spacing
                        #ycoord = (y-info_lo[1])/spacing
                        #zcoord = (z-info_lo[2])/spacing
                        #version2:
                        xcoord = (x - numxcells) * spacing + xcen
                        ycoord = (y - numycells) * spacing + ycen
                        zcoord = (z - numzcells) * spacing + zcen
                        coords = (xcoord, ycoord, zcoord)
                        tctr += 1
                        #    #print "addAtom: name=",name,"ADtype=", ADtype," val=", val, "coords=", coords,"ctr=", ctr
                        addAtom(mol, name, ADtype, val, coords, ctr)
        print "added ", tctr, '<-', ADtype, " atoms"
        if debug:
            print ADtype, ':', tctr, ' ', ctr
    print "total atoms=", ctr
    writer = PdbWriter()
    writer.write(outputfile, mol.allAtoms, records=['ATOM'])
Exemple #5
0
    def build2LevelsTree(self, atomlines):
        """
        Function to build a two level tree. 
        """
        print 'try to build a 2 level tree'
        self.mol = Molecule()
        self.mol.allAtoms = AtomSet()
        self.mol.atmNum = {}
        self.mol.parser = self
        if self.mol.name == 'NoName':
            self.mol.name = os.path.basename(
                os.path.splitext(self.filename)[0])

        self.mol.children = AtomSet([])
        self.mol.childrenName = 'atoms'
        self.mol.childrenSetClass = AtomSet
        self.mol.elementType = Atom
        self.mol.levels = [Molecule, Atom]
        ##1/18:self.mol.levels = [Protein, Atom]
        for atmline in atomlines:
            atom = Atom(atmline[1],
                        self.mol,
                        chemicalElement=string.split(atmline[5], '.')[0],
                        top=self.mol)
            #atom.element = atmline[5][0]
            atom.element = atom.chemElem
            atom.number = int(atmline[0])
            self.mol.atmNum[atom.number] = atom
            atom._coords = [[
                float(atmline[2]),
                float(atmline[3]),
                float(atmline[4])
            ]]
            if len(atmline) >= 9:
                atom._charges['mol2'] = float(atmline[8])
                atom.chargeSet = 'mol2'
#            atom.conformation = 0
            atom.hetatm = 0
            #add altname so buildBondsByDist doesn't croak
            atom.altname = None
            self.mol.allAtoms.append(atom)
        self.mol.atoms = self.mol.children
    def build2LevelsTree (self, atomlines):
        """
        Function to build a two level tree. 
        """
        print 'try to build a 2 level tree'
        self.mol= Molecule()
        self.mol.allAtoms = AtomSet()
        self.mol.atmNum = {}
        self.mol.parser = self
        if self.mol.name == 'NoName':
            self.mol.name = os.path.basename(os.path.splitext
                                             (self.filename)[0])

        self.mol.children = AtomSet([])
        self.mol.childrenName = 'atoms'
        self.mol.childrenSetClass = AtomSet
        self.mol.elementType = Atom
        self.mol.levels = [Molecule, Atom]
        ##1/18:self.mol.levels = [Protein, Atom]
        for atmline in atomlines:
            atom = Atom(atmline[1], self.mol,
                        chemicalElement = string.split(atmline[5], '.')[0],
            top = self.mol)
            #atom.element = atmline[5][0]
            atom.element = atom.chemElem
            atom.number = int(atmline[0])
            self.mol.atmNum[atom.number] = atom
            atom._coords = [ [float(atmline[2]), float(atmline[3]),
                                  float(atmline[4]) ] ]
            if len(atmline)>=9:
                atom._charges['mol2'] = float(atmline[8])
                atom.chargeSet = 'mol2'
#            atom.conformation = 0
            atom.hetatm = 0
            #add altname so buildBondsByDist doesn't croak
            atom.altname = None
            self.mol.allAtoms.append(atom)
        self.mol.atoms = self.mol.children
Exemple #7
0
    def parse(self, objClass=Protein):
        if self.allLines is None and self.filename:
            self.readFile()
            if self.allLines is None or len(self.allLines) == 0:
                return

        mol = Protein()
        self.mol = mol
        molList = mol.setClass()
        molList.append(mol)
        current_residue_number = None
        current_chain = None
        current_residue = None
        number_of_atoms = int(self.allLines[1][:5])

        self.configureProgressBar(init=1,
                                  mode='increment',
                                  authtext='parse atoms',
                                  max=number_of_atoms)

        current_chain = Chain(id='GRO', )
        # FIX this: The existence of allAtoms attribute (and the fact that it is an empty set rather than all atoms in
        # the chain) causes getNodesByMolecule() to return wrong values
        if hasattr(current_chain, "allAtoms"):
            del current_chain.allAtoms
        # current_chain = Chain( id='GRO',parent = mol)
        mol.adopt(current_chain, setChildrenTop=1)

        for index in range(2, number_of_atoms + 2):
            residue_number = int(self.allLines[index][:5])
            if residue_number != current_residue_number:  #
                # current_chain should adopt the current residue if there is one
                # create new residue
                res_type = self.allLines[index][5:10]
                residue_type = res_type.split(' ')[0]

                current_residue = Residue(type=residue_type,
                                          number=residue_number)
                current_residue_number = residue_number
                if current_residue is not None:  # REMEMBER TO ADOPT THE LAST ONE!!!

                    current_chain.adopt(current_residue, setChildrenTop=1)

            n = self.allLines[index][10:15]
            name = n.split(' ')[-1]
            element = name

            if element in list(babel_elements.keys()):
                element = element

            else:

                if residue_type == "System" or residue_type == "SOL":
                    # if element[1] == 'W':
                    #          element = 'H'
                    #   group is treated as one particle
                    # else:
                    element = element[0]

                elif element[:2] == 'Me':
                    element = 'C'
                else:
                    element = element[0]

            # if len(element)>1:
            #    if type(element[1]) == types.StringType:
            #
            #        if element[1] == element[1].lower():
            #            element =element
            #        else:
            #            element = element[0]
            #
            #    else:
            #        element = element[0]

            atom = Atom(name, current_residue, element, top=mol)
            c = self.allLines[index][15:20]
            cx = self.allLines[index][20:28]
            cy = self.allLines[index][28:36]
            cz = self.allLines[index][36:44]

            x = float(cx) * 10
            y = float(cy) * 10
            z = float(cz) * 10
            atom._coords = [[x, y, z]]

            atom._charges = []
            atom.segID = mol.name
            atom.normalname = name
            atom.number = int(self.allLines[index][15:20])
            atom.elementType = name[0]
            mol.atmNum[atom.number] = atom
            atom.altname = None
            atom.hetatm = 0
        mol.name = os.path.split(os.path.splitext(self.filename)[0])[-1]
        mol.allAtoms = mol.chains.residues.atoms
        mol.parser = self
        mol.levels = [Protein, Chain, Residue, Atom]
        name = ''
        for n in molList.name:
            name = n + ','
        name = name[:-1]
        molList.setStringRepr(name)
        strRpr = name + ':::'
        molList.allAtoms.setStringRepr(strRpr)
        for m in molList:
            mname = m.name
            strRpr = mname + ':::'
            m.allAtoms.setStringRepr(strRpr)
            strRpr = mname + ':'
            m.chains.setStringRepr(strRpr)
            for c in m.chains:
                cname = c.id
                strRpr = mname + ':' + cname + ':'
                c.residues.setStringRepr(strRpr)
                for r in c.residues:
                    rname = r.name
                    strRpr = mname + ':' + cname + ':' + rname + ':'
                    r.atoms.setStringRepr(strRpr)
        return molList
        #5/19:
        rec = rec + ' %-2.2s' % atm.autodock_element
        #rec = rec + ' %-2.2s'%atm.autodock_element.upper()
        ##         #NB: write 'A' in element slot for aromatic carbons
        ##         if atm.autodock_element=='A':
        ##             #in this case, columns 78+79 are blanks
        ##             rec = rec + 'A  '
        ##         else:
        ##             #rec = rec + '%2.2s'%atm.element
        ##             #5/19:
        ##             #columns 78+79: autodock_element
        ##             rec = rec + '%s '%atm.autodock_element
        ##             #if atm.element!=atm.autodock_element:
        ##             #    #eg HD or NA or SA or OA, always 2 chars
        ##             #    rec = rec + '%s '%atm.autodock_element[1]
        ##             #else:
        ##             #    rec = rec + '  '
        rec = rec + '\n'
        return rec


if __name__ == '__main__':
    from MolKit.protein import Protein
    from MolKit.pdbParser import PdbParser
    mol = Protein()
    mol.read('/tsri/pdb/struct/4tpi.pdb', PdbParser())
    writer = PdbWriter()
    writer.add_userRecord('REMARK', )
    writer.add_userRecord('TITLE ', [('', 'This is the title record\n')])
    writer.write('/home/ktchan/jumble.pdb', mol)
Exemple #9
0
    def getMolecule(self, molInd):

        molecules = []
        if molInd == len(self.molIndex) - 1:
            lastLine = -1
        else:
            lastLine = self.molIndex[molInd + 1]
        # lines fotr that molecule
        lines = self.allLines[self.molIndex[molInd]:lastLine]
        lineIndex = 0
        atomsSeen = {}  # dict of atom types and number of atoms seen

        # parser header
        molName = lines[lineIndex].strip()
        lineIndex += 3

        # create molecule
        mol = Protein(name=molName)
        mol.info = lines[lineIndex + 1]
        mol.comment = lines[lineIndex + 1]
        #self.mol.parser = self
        chain = Chain(id='1', parent=mol, top=mol)
        res = Residue(type='UNK', number='1', parent=chain, top=mol)
        mol.levels = [Protein, Chain, Residue, Atom]

        # parse count line
        line = lines[lineIndex]
        assert line[
            33:
            39] == " V2000", "Format error: only V2000 is suported, got %s" % line[
                33:39]
        nba = int(line[0:3])  # number of atoms
        nbb = int(line[3:6])  # number of bonds
        nbal = int(line[6:9])  # number of atom lists
        ccc = int(line[12:15])  # chiral flag: 0=not chiral, 1=chiral
        sss = int(line[15:18])  # number of stext entries
        lineIndex += 1

        # parse atoms
        for anum in range(nba):
            line = lines[lineIndex]
            element = line[31:34].strip()
            if element in atomsSeen:
                atomsSeen[element] += 1
            else:
                atomsSeen[element] = 1
            atom = Atom(name='%s_%s' % (element, atomsSeen[element]),
                        parent=res,
                        chemicalElement=element,
                        top=mol)

            atom._coords = [[
                float(line[0:10]),
                float(line[10:20]),
                float(line[20:30])
            ]]
            atom._charges['sdf'] = int(line[35:38])
            atom.chargeSet = 'sdf'
            mol.allAtoms.append(atom)

            atom.massDiff = int(line[34:36])
            atom.stereo = int(line[38:41])
            atom.hcount = line[41:44]
            atom.valence = int(line[47:50])
            atom.hetatm = 1
            atom.occupancy = 0.0
            atom.temperatureFactor = 0.0
            lineIndex += 1

        # parse bonds
        for bnum in range(nba):
            line = lines[lineIndex]
            at1 = mol.allAtoms[int(line[0:3]) - 1]
            at2 = mol.allAtoms[int(line[3:6]) - 1]
            if at1.isBonded(at2): continue
            bond = Bond(at1, at2, check=0)

            bond.bondOrder = int(line[6:9])
            #1 = Single, 2 = Double,
            #3 = Triple, 4 = Aromatic,
            #5 = Single or Double,
            #6 = Single or Aromatic,
            #7 = Double or Aromatic, 8 = Any

            bond.stereo = int(line[9:12])
            #Single bonds: 0 = not stereo,
            #1 = Up, 4 = Either,
            #6 = Down, Double bonds: 0 = Use x-, y-, z-coords
            #from atom block to determine cis or trans,
            #3 = Cis or trans (either) double bond

            bond.topo = int(line[15:18])
            # 0 = Either, 1 = Ring, 2 = Chain

            try:
                bond.ReactionCenter = int(line[18:21])
            except ValueError:
                bond.ReactionCenter = 0
            #0 = unmarked, 1 = a center, -1 = not a center,
            #Additional: 2 = no change,
            #4 = bond made/broken,
            #8 = bond order changes
            #12 = 4+8 (both made/broken and changes);
            #5 = (4 + 1), 9 = (8 + 1), and 13 = (12 + 1)

        # "M END" and properties are not parsed at this point
        self.mol = mol
        mname = mol.name
        strRpr = mname + ':::'
        mol.allAtoms.setStringRepr(strRpr)
        strRpr = mname + ':'
        mol.chains.setStringRepr(strRpr)
        for c in mol.chains:
            cname = c.id
            strRpr = mname + ':' + cname + ':'
            c.residues.setStringRepr(strRpr)
            for r in c.residues:
                rname = r.name
                strRpr = mname + ':' + cname + ':' + rname + ':'
                r.atoms.setStringRepr(strRpr)
        molList = mol.setClass()
        molList.append(mol)
        mol.parser = self
        for n in molList.name:
            name = n + ','
        name = name[:-1]
        molList.setStringRepr(name)
        strRpr = name + ':::'
        molList.allAtoms.setStringRepr(strRpr)

        return molList
Exemple #10
0
    def parse(self, objClass=Protein):
        """Parses mmCIF dictionary (self.mmCIF_dict) into MolKit object"""
        if self.allLines is None and self.filename:
            self.readFile()
            if self.allLines is None or len(self.allLines)==0:
                return
            self.mmCIF2Dict()
        type_symbol = None
        B_iso_or_equiv = None
        mmCIF_dict = self.mmCIF_dict
        fileName, fileExtension = os.path.splitext(self.filename)
        molName = os.path.basename(fileName)
        if mmCIF_dict.has_key('_entry.id'):
            molName = mmCIF_dict['_entry.id']
        if mmCIF_dict.has_key('_atom_site.id'):
            #The description of the data names can be found in the following link
            #http://mmcif.pdb.org/dictionaries/mmcif_pdbx.dic/Items   
            ids = mmCIF_dict['_atom_site.id'] #1 number
            group_PDB = mmCIF_dict['_atom_site.group_PDB']          #2 atom/hetatm
            
            atom_id = mmCIF_dict['_atom_site.label_atom_id']  #3 name

            comp_id = mmCIF_dict['_atom_site.label_comp_id']  #4 residue type
            label_asym_id = mmCIF_dict['_atom_site.label_asym_id']  #5 chain 
            #Note: chain ID from mmCIF file might be different from PDB file
            seq_id = mmCIF_dict['_atom_site.label_seq_id']    #6 residue number
            x_coords = mmCIF_dict['_atom_site.Cartn_x']             #7 xcoord
            y_coords = mmCIF_dict['_atom_site.Cartn_y']             #8 ycoord
            z_coords = mmCIF_dict['_atom_site.Cartn_z']             #9 zcoord
            occupancy = mmCIF_dict['_atom_site.occupancy']          #10    
            B_iso_or_equiv = mmCIF_dict['_atom_site.B_iso_or_equiv']#11
            type_symbol = mmCIF_dict['_atom_site.type_symbol']
            
                
        elif mmCIF_dict.has_key('_atom_site_label'):
            #ftp://ftp.iucr.org/pub/cif_core.dic
            atom_id = mmCIF_dict['_atom_site_label']
            len_atoms = len(atom_id)
            ids = range(len_atoms)
            
            group_PDB = len_atoms*['HETATM']
            comp_id = len_atoms*["CIF"]
            label_asym_id = len_atoms*['1']
            seq_id = len_atoms*[1]
            
            from mglutil.math.crystal import Crystal
            a = mmCIF_dict['_cell.length_a'] = float(mmCIF_dict['_cell_length_a'].split('(')[0])
            b = mmCIF_dict['_cell.length_b'] = float(mmCIF_dict['_cell_length_b'].split('(')[0])
            c = mmCIF_dict['_cell.length_c'] = float(mmCIF_dict['_cell_length_c'].split('(')[0])
            alpha = mmCIF_dict['_cell.angle_alpha'] = float(mmCIF_dict['_cell_angle_alpha'].split('(')[0])
            beta = mmCIF_dict['_cell.angle_beta'] = float(mmCIF_dict['_cell_angle_beta'].split('(')[0])
            gamma = mmCIF_dict['_cell.angle_gamma'] = float(mmCIF_dict['_cell_angle_gamma'].split('(')[0])
            cryst = Crystal((a, b, c), (alpha, beta, gamma))
            x = []
            for item in mmCIF_dict['_atom_site_fract_x']:
                x.append(float(item.split('(')[0]))
            y = []
            for item in mmCIF_dict['_atom_site_fract_y']:
                y.append(float(item.split('(')[0]))
            z = []
            for item in mmCIF_dict['_atom_site_fract_z']:
                z.append(float(item.split('(')[0]))
                
            x_coords = []
            y_coords = []
            z_coords = []
            B_iso_or_equiv = []
            for i in ids:
                trans = cryst.toCartesian([x[i], y[i], z[i]])
                
                x_coords.append(trans[0]) 
                y_coords.append(trans[1])
                z_coords.append(trans[2])
                if mmCIF_dict.has_key('_atom_site_U_iso_or_equiv'):
                    B_iso_or_equiv.append(mmCIF_dict['_atom_site_U_iso_or_equiv'][i].split('(')[0])
            if mmCIF_dict.has_key('_atom_site_type_symbol'):
                type_symbol = mmCIF_dict['_atom_site_type_symbol']
            if mmCIF_dict.has_key('_atom_site_occupancy'):
                occupancy = mmCIF_dict['_atom_site_occupancy']
            if mmCIF_dict.has_key('_chemical_name_common'):   
                molName = mmCIF_dict['_chemical_name_common']
            elif mmCIF_dict.has_key('_chemical_name_mineral'):
                molName = mmCIF_dict['_chemical_name_mineral']
                                
            if mmCIF_dict.has_key('_symmetry_space_group_name_H-M'):   
                mmCIF_dict['_symmetry.space_group_name_H-M'] = mmCIF_dict['_symmetry_space_group_name_H-M']
        else:
            print 'No _atom_site.id or _atom_site_label record is available in %s' % self.filename
            return  None  
        
        mol = Protein()
        self.mol = mol
        self.mol.allAtoms = AtomSet([])
        molList = mol.setClass()
        molList.append( mol )
        current_chain_id = None
        current_residue_number = None
        current_chain = None
        current_residue = None
        
        number_of_atoms = len(ids)

        self.configureProgressBar(init=1, mode='increment', 
                                  authtext='parse atoms', max=number_of_atoms)
        for index in range(number_of_atoms):              
            #make a new atom for the current index
            chain_id = label_asym_id[index]
            if chain_id != current_chain_id:         #make a new chain
                #molecule should adopt the current chain if there is one
                current_chain = Chain(id=chain_id)
                # FIXME: current_chain should not have allAtoms attribute
                delattr(current_chain, "allAtoms")
                current_chain_id = chain_id
                
                if current_chain is not None:    #REMEMBER TO ADOPT THE LAST ONE!!!
                    mol.adopt(current_chain, setChildrenTop=1)                    
            residue_number = seq_id[index]   

            if residue_number != current_residue_number or chain_id != label_asym_id[index-1]:         #make a new chain:
                #current_chain should adopt the current residue if there is one
                #create new residue
                residue_type = comp_id[index]
                current_residue = Residue(type=residue_type, number=residue_number)
                current_residue_number = residue_number
                if current_residue is not None:    #REMEMBER TO ADOPT THE LAST ONE!!!
                    current_chain.adopt(current_residue, setChildrenTop=1)
                
            
            name = atom_id[index]
            if type_symbol:
                element = type_symbol[index]
            else:
                element = None
            atom = Atom( name, current_residue, element, top=mol )
            atom._coords = [[float(x_coords[index]), float(y_coords[index]), float(z_coords[index])]]
            atom._charges = {}
            atom.segID =  mol.name   
            atom.normalname = name
            atom.number = int(ids[index])
            mol.atmNum[atom.number] = atom
            atom.occupancy = float(occupancy[index])
            if B_iso_or_equiv:
                atom.temperatureFactor = float(B_iso_or_equiv[index])
            atom.altname = None    
            atom.hetatm = 0
            if group_PDB[index]=='HETATM':
                atom.hetatm = 1
            self.updateProgressBar()
                           
        self.parse_MMCIF_CELL()
        try:
            self.parse_MMCIF_HYDBND()       
        except:
             print >>sys.stderr,"Parsing Hydrogen Bond Record Failed in",self.filename
               
        mol.name = molName
        mol.allAtoms = mol.chains.residues.atoms
        
        mol.parser = self
        mol.levels = [Protein, Chain, Residue, Atom]
        name = ''
        for n in molList.name:
            name = n + ','
        name = name[:-1]
        molList.setStringRepr(name)
        strRpr = name + ':::'
        molList.allAtoms.setStringRepr(strRpr)
        for m in molList:
            mname = m.name
            strRpr = mname + ':::'
            m.allAtoms.setStringRepr(strRpr)
            strRpr = mname + ':'
            m.chains.setStringRepr(strRpr)
            for c in m.chains:
                cname = c.id
                strRpr = mname + ':' + cname + ':'
                c.residues.setStringRepr(strRpr)
                for r in c.residues:
                    rname = r.name
                    strRpr = mname + ':' + cname + ':' + rname + ':'
                    r.atoms.setStringRepr(strRpr)                            
        self.buildBonds()
        return molList
class FloodPlayer(Player):
    def __init__(self, command, file):
        master = command.vf.GUI.ROOT
        self.autoLigandCommand = command.vf.AutoLigandCommand
        self.autoLigandCommand.spheres.Set(visible=1)
        self.autoLigandCommand.halo.Set(visible=1)
        pkl_file = open(file, 'rb')
        self.floods = []
        try:
            data = cPickle.load(pkl_file)
        except Exception, inst:
            print "Error loading ", __file__, "\n", inst
        self.xcent = data[0]
        self.ycent = data[1]
        self.zcent = data[2]
        self.centerx = data[3]
        self.centery = data[4]
        self.centerz = data[5]
        self.spacing = data[6]
        self.centers = []
        data = cPickle.load(pkl_file)
        self.floods.append(data[1])
        try:
            while data:
                data = cPickle.load(pkl_file)
                flood = copy.copy(self.floods[-1])
                for item in data[0]:
                    flood.remove(item)
                for item in data[1]:
                    flood.append(item)
                self.floods.append(flood)
        except EOFError:
            pass
        pkl_file.close()
        fileName = os.path.splitext(os.path.split(file)[-1])[0]
        self.mol = Protein(fileName)
        self.mol.allAtoms = AtomSet([])
        chain = Chain()
        self.residue = Residue(type="UNK")
        chain.adopt(self.residue, setChildrenTop=1)
        self.mol.adopt(chain, setChildrenTop=1)
        self.mol.parser = None
        self.filename = file
        fl = self.floods[0][0]
        x = (fl[1] - self.xcent) * self.spacing + self.centerx
        y = (fl[2] - self.ycent) * self.spacing + self.centery
        z = (fl[3] - self.zcent) * self.spacing + self.centerz
        if fl[4] == 7:
            atomchr = 'P'
            # note, this will color the NA atom pink (the PDB color for Phosphorus)
            radius = AAradii[13][0]
        if fl[4] == 6:
            atomchr = 'S'
            radius = AAradii[13][0]
        if fl[4] == 5:
            atomchr = 'A'
            radius = AAradii[10][0]
        if fl[4] == 4:
            atomchr = 'O'
            radius = AAradii[1][0]
        if fl[4] == 3:
            atomchr = 'N'
            radius = AAradii[4][0]
        if fl[4] == 2:
            atomchr = 'C'
            radius = AAradii[10][0]
        if fl[4] == 1:
            atomchr = 'H'
            radius = AAradii[15][0]
        a = Atom(atomchr, self.residue, atomchr, top=self.mol)
        a._coords = [[x, y, z]]
        a._charges = {}
        a.hetatm = 1
        a.number = 0
        a.radius = radius
        self.mol.allAtoms = self.residue.atoms
        self.mol = self.autoLigandCommand.vf.addMolecule(self.mol, False)
        self.mol.levels = [Protein, Chain, Residue, Atom]
        self.autoLigandCommand.vf.displayCPK(self.mol, scaleFactor=0.4)
        self.autoLigandCommand.vf.colorByAtomType(self.mol, ['cpk'], log=0)
        self.autoLigandCommand.vf.displayLines(self.mol,
                                               negate=True,
                                               displayBO=False,
                                               lineWidth=2,
                                               log=0,
                                               only=False)
        self.colorKeys = a.colors.keys()
        maxLen = len(self.floods) - 1
        Player.__init__(self,
                        master=master,
                        endFrame=maxLen,
                        maxFrame=maxLen,
                        titleStr="AutoLigand Flood Player",
                        hasSlider=True)
        try:  # withdrew SetAnim button
            self.form.ifd.entryByName['setanimB']['widget'].grid_forget()
            self.form.autoSize()
        except:
            pass
        self.nextFrame(0)
        self.form.root.protocol('WM_DELETE_WINDOW', self.hide_cb)
    def build4LevelsTree(self, subst_chain, atomlines):
        """
        Function to build a 4 level hierarchy Protein-Chain-Residue-Atom.

        """
        self.mol= Protein()
        self.mol.allAtoms = AtomSet()
        self.mol.atmNum = {}
        self.mol.parser = self
        if self.mol.name == 'NoName':
            self.mol.name = os.path.basename(os.path.splitext
                                             (self.filename)[0])
        self.mol.curChain = Chain()
        self.mol.curRes = Residue()
        self.mol.levels = [Protein, Chain, Residue, Atom]
        i = 1
        for atmline in atomlines:
            if len(atmline)>= 10:
                status = string.split(atmline[9], '|')
            else: status = None
            if len(atmline) == 8:
                tmp = [atmline[5][:5], atmline[5][5:]]
                atmline[5] = tmp[0]
                atmline.insert(6, tmp[1])

            if status and status[0]=='WATER':
                chainID = 'W'
                atmline[7] = 'HOH'+str(i)
                subst_chain[atmline[7]] = chainID
                i = i+1

            if subst_chain == {}:
                chainID = 'default'

            elif not subst_chain.has_key(atmline[7]):
                if subst_chain.has_key('****'):
                    try:
                        chainID = subst_chain[atmline[7]]
                    except:
                        chainID = 'default'
                else:
                    chainID = 'default'

            elif type(subst_chain[atmline[7]]) is types.StringType:
                # that is to say that only chains has this substructure name.
                chainID = subst_chain[atmline[7]]

            elif type(subst_chain[atmline[7]]) is types.ListType:
                # That is to say that several chains have the same substructure.
                 chainID = subst_chain[atmline[7]][0]
                 subst_chain[atmline[7]] = subst_chain[atmline[7]].remove(chainID)
                 
            if chainID != self.mol.curChain.id:
                if not self.mol.chains.id or not chainID in self.mol.chains.id:
                    self.mol.curChain = Chain(chainID, self.mol,
                                          top = self.mol)
                else:
                    self.mol.curChain = self.mol.chains.get(chainID)[0]

            if len(atmline)<7:
                # test if the atmline has a res name and resseq:
                resName = 'RES'
                resSeq = '1'
            else:
                resName = atmline[7][:3]
                resSeq = atmline[7][3:]

            if resSeq != self.mol.curRes.number or \
               resName != self.mol.curRes.type:
                # check if this residue already exists
                na = string.strip(resName) + string.strip(resSeq)
                res = self.mol.curChain.get( na )
                if res:
                    self.mol.curRes = res[0]
                else:
                    self.mol.curRes = Residue(resName, resSeq, '',
                                              self.mol.curChain,
                                              top = self.mol)
            name = atmline[1]
            if name == 'CA': self.mol.curRes.hasCA = 1
            if name == 'O' : self.mol.curRes.hasO = 2
            atom = Atom(name, self.mol.curRes, top = self.mol,
            chemicalElement = string.split(atmline[5], '.')[0])
            #atom.element = atmline[5][0]
            atom.element = atom.chemElem
            atom.number = int(atmline[0])
            self.mol.atmNum[atom.number] = atom
            atom._coords = [ [float(atmline[2]), float(atmline[3]),
                              float(atmline[4]) ] ]
            if len(atmline)>=9:                
                atom._charges['mol2'] = float(atmline[8])
                atom.chargeSet = 'mol2'
#            atom.conformation = 0
            atom.hetatm = 0
            #Add a data member containing a list of string describing
            # the Sybyl status bis of the atoms.
            atom.status = status
            #add altname so buildBondsByDist doesn't croak
            atom.altname = None
            self.mol.allAtoms.append(atom)
        delattr(self.mol, 'curRes')
        delattr(self.mol, 'curChain')
    def build3LevelsTree(self,atomlines):
        """ Function to build a 3 levels hierarchy Molecule-substructure-atoms."""

        self.mol= Protein()
        self.mol.allAtoms = AtomSet()
        self.mol.atmNum = {}
        self.mol.parser = self
        if self.mol.name == 'NoName':
            self.mol.name = os.path.basename(os.path.splitext
                                             (self.filename)[0])
        self.mol.children = ResidueSet([])
        self.mol.childrenName = 'residues'
        self.mol.childrenSetClass = ResidueSet
        self.mol.elementType = Residue
        self.mol.curRes = Residue()
        self.mol.curRes.hasCA = 0
        self.mol.curRes.hasO = 0
        
        self.mol.levels = [Protein, Residue, Atom]
        for atmline in atomlines:
            if len(atmline)>= 10:
                status = string.split(atmline[9], '|')
            else:
                status = None
            resName = atmline[7][:3]
            resSeq = atmline[7][3:]
            if resSeq != self.mol.curRes.number or \
               resName != self.mol.curRes.type:
                # check if this residue already exists
                na = string.strip(resName) + string.strip(resSeq)
                res = self.mol.get(na)
                if res:
                    self.mol.curRes = res[0]
                else:
                    self.mol.curRes = Residue(resName, resSeq, '',
                                              self.mol,
                                              top = self.mol)
            name = atmline[1]
            if name == 'CA': self.mol.curRes.hasCA = 1
            if name == 'O' : self.mol.curRes.hasO = 2
            atom = Atom(name, self.mol.curRes, top = self.mol,
            chemicalElement = string.split(atmline[5], '.')[0])
            #atom.element = atmline[5][0]
            atom.element = atom.chemElem
            atom.number = int(atmline[0])
            self.mol.atmNum[atom.number] = atom
            atom._coords = [ [float(atmline[2]), float(atmline[3]),
                              float(atmline[4]) ] ]
            atom._charges['mol2'] = float(atmline[8])
            atom.chargeSet = mol2
#            atom.conformation = 0
            atom.hetatm = 0
            #Add a data member containing a list of string describing
            # the Sybyl status bis of the atoms.
            atom.status = status 
            #add altname so buildBondsByDist doesn't croak
            atom.altname = None
            self.mol.allAtoms.append(atom)
            
        self.mol.residues = self.mol.children
        assert hasattr(self.mol, 'chains')
        delattr(self.mol, 'chains')
        delattr(self.mol, 'curRes')
class Mol2Parser(MoleculeParser):

    Mol2Tags = ["@<TRIPOS>AlT_TYPE","@<TRIPOS>ANCHOR_ATOM",
                "@<TRIPOS>ASSOCIATED_ANNOTATION", "@<TRIPOS>ATOM",
                "@<TRIPOS>BOND","@<TRIPOS>CENTER_OF_MASS",
                "@<TRIPOS>CENTROID", "@<TRIPOS>COMMENT",
                "@<TRIPOS>CRYSIN", "@<TRIPOS>CURR_POS", "@<TRIPOS>DICT",
                "@<TRIPOS>DATA_FILE", "@<TRIPOS>EXTENSION_POINT",
                "@<TRIPOS>FF_PBC", "@<TRIPOS>FFCON_ANGLE",
                "@<TRIPOS>FFCON_DIST","@<TRIPOS>FFCON_RANGE",
                "@<TRIPOS>FFCON_TORSION", "@<TRIPOS>LINE",
                "@<tripos>LSPLANE",
                "@<TRIPOS>MOLECULE", "@<TRIPOS>NORMAL",
                "@<TRIPOS>POLYBUILD_HIST", "@<TRIPOS>QSAR_ALIGN_RULE",
                "@<TRIPOS>RING_CLOSURE", "@<TRIPOS>ROTABLE_BOND",
                "@<TRIPOS>SEARCH_DIST", "@<TRIPOS>SEARCH_OPTIONS",
                "@<TRIPOS>SUBSTRUCTURE", "@<TRIPOS>U_FEAT"]
    
               
    def __init__(self, filename):
        MoleculeParser.__init__(self, filename)
        self.mol2RecordParser = {}
        self.defaultReadOptions = ['@<TRIPOS>ATOM','@<TRIPOS>BOND',
                                   '@<TRIPOS>MOLECULE',
                                   '@<TRIPOS>SET','@<TRIPOS>SUBSTRUCTURE',
                                   '@<TRIPOS>DICT'] 
        self.keysAndLinesIndices = {} # stores all Mol2 keys .
        self.counter = 0
        self.setsDatas = []
        #self.molList = []

    def getKeysAndLinesIndices(self):
        """ Function to build a dictionary where the keys will be the
        records name of the mol2 files (@<TRIPOS>ATOM, @<TRIPOS>BOND...) and
        the value will be the index of the starting line of that record.
        """
        #this removes all comment and blank lines to fix bug #846
        for i,line in enumerate(self.allLines):
            if not line:
                self.allLines.pop(i)
            elif line[0] == '#':
                self.allLines.pop(i)
                
        i = 0
        record = None
        while  i != len(self.allLines):
            if self.allLines[i][:9] == '@<TRIPOS>':
                if self.keysAndLinesIndices:
                    self.keysAndLinesIndices[record].append(i)
                record = string.strip(self.allLines[i])
                self.keysAndLinesIndices[record] = [i+1]
                i = i+1
            else:
                i = i+1
        if record:
            self.keysAndLinesIndices[record].append(i)
        else:
            print " the file %s doesn't contain any mol2 records"%self.filename
            

        
    def parse(self):
        """ This function read a file and create the corresponding
        data hierarchy. """
        self.readFile()
        #molList = []
        molList = ProteinSet()
        if self.allLines is None:
            return
        elif len(self.allLines)!=0:
            self.getKeysAndLinesIndices()
        else:
            print "The file %s is empty"%self.filename
            return molList

        if not self.keysAndLinesIndices.has_key("@<TRIPOS>ATOM"):
            print "The file %s doesn't have Atom records, molecules can't be built"%self.filename
            return molList
        if self.keysAndLinesIndices.has_key('@<TRIPOS>SUBSTRUCTURE'):
            
            self.parse_MOL2_Substructure(self.allLines
                                         [self.keysAndLinesIndices
                                          ['@<TRIPOS>SUBSTRUCTURE'][0]:
                                          self.keysAndLinesIndices
                                          ['@<TRIPOS>SUBSTRUCTURE'][1]])
            molList.append(self.mol)

        else:
            atmlines = map(string.split, self.allLines
                           [self.keysAndLinesIndices
                            ['@<TRIPOS>ATOM'][0]:
                            self.keysAndLinesIndices
                            ['@<TRIPOS>ATOM'][1]])
            self.build4LevelsTree({},atmlines)
##              self.build2LevelsTree(map(string.split, self.allLines
##                                        [self.keysAndLinesIndices
##                                         ['@<TRIPOS>ATOM'][0]:
##                                         self.keysAndLinesIndices
##                                         ['@<TRIPOS>ATOM'][1]]))
            molList.append(self.mol)

        if self.keysAndLinesIndices.has_key('@<TRIPOS>BOND'):
            self.parse_MOL2_Bonds(self.allLines
                                  [self.keysAndLinesIndices
                                   ['@<TRIPOS>BOND'][0]:
                                   self.keysAndLinesIndices['@<TRIPOS>BOND']
                                   [1]])

        if self.keysAndLinesIndices.has_key('@<TRIPOS>SET'):
            self.parse_MOL2_Sets(self.keysAndLinesIndices['@<TRIPOS>SET'])

        return molList

    def parse_MOL2_Substructure(self, substlines):
        """build a dictionary with the chain id as keys and the
        list of residues belonging to that chain as values. If
        the id of the chain is not here then the keys is '', if
        two residues with the same name but belonging to two different
        chains then the value corresponding to that key is a list of
        the chains ID."""
        atomlines = map(string.split, self.allLines
                                 [self.keysAndLinesIndices
                                  ['@<TRIPOS>ATOM'][0]:
                                  self.keysAndLinesIndices
                                  ['@<TRIPOS>ATOM'][1]])
        subst_chain = {}
        if len(substlines) == 0:
            # case 1: no substructures are defined --> 2 levels tree.
            #self.build2LevelsTree(atomlines)
            #subst_chain = {'t
            self.build4LevelsTree(subst_chain,atomlines)

##          else:
##              substlines = map(string.split, substlines)
##              lines = filter(lambda x: len(x)>5, substlines)
##              if lines == [] or (lines != [] and \
##                                 filter(lambda x: x[5] != '****', lines)==[]):
                
##                  #self.build3LevelsTree(atomlines)

##              else:
##                  # case 3: at least 1 substructure and 1 chain --> 4 levels tree.
##                  #subst_chain = {}
                
##                  for line in substlines:
##                      try:
##                          if line[1] in subst_chain.keys():
##                              subst_chain[line[1]] = [subst_chain[line[1]]]
##                              subst_chain[line[1]].append(line[5])
##                          else:
##                              subst_chain[line[1]] = line[5]
##                      except:
##                          if line[1] in subst_chain.keys():
##                              list(subst_chain[line[1]]).append('')
##                          else:
##                              subst_chain[line[1]] = ''
##                  self.subst_chain = subst_chain
##                  self.build4LevelsTree(subst_chain,atomlines)
        else:
            # case 3: at least 1 substructure and 1 chain --> 4 levels tree.
            #subst_chain = {}
            substlines = map(string.split, substlines)
            for line in substlines:
                if len(line)<6 or line[5] == '****':
                    continue
                else:
                    try:
                        if line[1] in subst_chain.keys():
                            subst_chain[line[1]] = [subst_chain[line[1]]]
                            subst_chain[line[1]].append(line[5])
                        else:
                            subst_chain[line[1]] = line[5]
                    except:
                        if line[1] in subst_chain.keys():
                            list(subst_chain[line[1]]).append('')
                        else:
                            subst_chain[line[1]] = ''
            self.subst_chain = subst_chain
            self.build4LevelsTree(subst_chain,atomlines)


    def build2LevelsTree (self, atomlines):
        """
        Function to build a two level tree. 
        """
        print 'try to build a 2 level tree'
        self.mol= Molecule()
        self.mol.allAtoms = AtomSet()
        self.mol.atmNum = {}
        self.mol.parser = self
        if self.mol.name == 'NoName':
            self.mol.name = os.path.basename(os.path.splitext
                                             (self.filename)[0])

        self.mol.children = AtomSet([])
        self.mol.childrenName = 'atoms'
        self.mol.childrenSetClass = AtomSet
        self.mol.elementType = Atom
        self.mol.levels = [Molecule, Atom]
        ##1/18:self.mol.levels = [Protein, Atom]
        for atmline in atomlines:
            atom = Atom(atmline[1], self.mol,
                        chemicalElement = string.split(atmline[5], '.')[0],
            top = self.mol)
            #atom.element = atmline[5][0]
            atom.element = atom.chemElem
            atom.number = int(atmline[0])
            self.mol.atmNum[atom.number] = atom
            atom._coords = [ [float(atmline[2]), float(atmline[3]),
                                  float(atmline[4]) ] ]
            if len(atmline)>=9:
                atom._charges['mol2'] = float(atmline[8])
                atom.chargeSet = 'mol2'
#            atom.conformation = 0
            atom.hetatm = 0
            #add altname so buildBondsByDist doesn't croak
            atom.altname = None
            self.mol.allAtoms.append(atom)
        self.mol.atoms = self.mol.children


    def build3LevelsTree(self,atomlines):
        """ Function to build a 3 levels hierarchy Molecule-substructure-atoms."""

        self.mol= Protein()
        self.mol.allAtoms = AtomSet()
        self.mol.atmNum = {}
        self.mol.parser = self
        if self.mol.name == 'NoName':
            self.mol.name = os.path.basename(os.path.splitext
                                             (self.filename)[0])
        self.mol.children = ResidueSet([])
        self.mol.childrenName = 'residues'
        self.mol.childrenSetClass = ResidueSet
        self.mol.elementType = Residue
        self.mol.curRes = Residue()
        self.mol.curRes.hasCA = 0
        self.mol.curRes.hasO = 0
        
        self.mol.levels = [Protein, Residue, Atom]
        for atmline in atomlines:
            if len(atmline)>= 10:
                status = string.split(atmline[9], '|')
            else:
                status = None
            resName = atmline[7][:3]
            resSeq = atmline[7][3:]
            if resSeq != self.mol.curRes.number or \
               resName != self.mol.curRes.type:
                # check if this residue already exists
                na = string.strip(resName) + string.strip(resSeq)
                res = self.mol.get(na)
                if res:
                    self.mol.curRes = res[0]
                else:
                    self.mol.curRes = Residue(resName, resSeq, '',
                                              self.mol,
                                              top = self.mol)
            name = atmline[1]
            if name == 'CA': self.mol.curRes.hasCA = 1
            if name == 'O' : self.mol.curRes.hasO = 2
            atom = Atom(name, self.mol.curRes, top = self.mol,
            chemicalElement = string.split(atmline[5], '.')[0])
            #atom.element = atmline[5][0]
            atom.element = atom.chemElem
            atom.number = int(atmline[0])
            self.mol.atmNum[atom.number] = atom
            atom._coords = [ [float(atmline[2]), float(atmline[3]),
                              float(atmline[4]) ] ]
            atom._charges['mol2'] = float(atmline[8])
            atom.chargeSet = mol2
#            atom.conformation = 0
            atom.hetatm = 0
            #Add a data member containing a list of string describing
            # the Sybyl status bis of the atoms.
            atom.status = status 
            #add altname so buildBondsByDist doesn't croak
            atom.altname = None
            self.mol.allAtoms.append(atom)
            
        self.mol.residues = self.mol.children
        assert hasattr(self.mol, 'chains')
        delattr(self.mol, 'chains')
        delattr(self.mol, 'curRes')

    def build4LevelsTree(self, subst_chain, atomlines):
        """
        Function to build a 4 level hierarchy Protein-Chain-Residue-Atom.

        """
        self.mol= Protein()
        self.mol.allAtoms = AtomSet()
        self.mol.atmNum = {}
        self.mol.parser = self
        if self.mol.name == 'NoName':
            self.mol.name = os.path.basename(os.path.splitext
                                             (self.filename)[0])
        self.mol.curChain = Chain()
        self.mol.curRes = Residue()
        self.mol.levels = [Protein, Chain, Residue, Atom]
        i = 1
        for atmline in atomlines:
            if len(atmline)>= 10:
                status = string.split(atmline[9], '|')
            else: status = None
            if len(atmline) == 8:
                tmp = [atmline[5][:5], atmline[5][5:]]
                atmline[5] = tmp[0]
                atmline.insert(6, tmp[1])

            if status and status[0]=='WATER':
                chainID = 'W'
                atmline[7] = 'HOH'+str(i)
                subst_chain[atmline[7]] = chainID
                i = i+1

            if subst_chain == {}:
                chainID = 'default'

            elif not subst_chain.has_key(atmline[7]):
                if subst_chain.has_key('****'):
                    try:
                        chainID = subst_chain[atmline[7]]
                    except:
                        chainID = 'default'
                else:
                    chainID = 'default'

            elif type(subst_chain[atmline[7]]) is types.StringType:
                # that is to say that only chains has this substructure name.
                chainID = subst_chain[atmline[7]]

            elif type(subst_chain[atmline[7]]) is types.ListType:
                # That is to say that several chains have the same substructure.
                 chainID = subst_chain[atmline[7]][0]
                 subst_chain[atmline[7]] = subst_chain[atmline[7]].remove(chainID)
                 
            if chainID != self.mol.curChain.id:
                if not self.mol.chains.id or not chainID in self.mol.chains.id:
                    self.mol.curChain = Chain(chainID, self.mol,
                                          top = self.mol)
                else:
                    self.mol.curChain = self.mol.chains.get(chainID)[0]

            if len(atmline)<7:
                # test if the atmline has a res name and resseq:
                resName = 'RES'
                resSeq = '1'
            else:
                resName = atmline[7][:3]
                resSeq = atmline[7][3:]

            if resSeq != self.mol.curRes.number or \
               resName != self.mol.curRes.type:
                # check if this residue already exists
                na = string.strip(resName) + string.strip(resSeq)
                res = self.mol.curChain.get( na )
                if res:
                    self.mol.curRes = res[0]
                else:
                    self.mol.curRes = Residue(resName, resSeq, '',
                                              self.mol.curChain,
                                              top = self.mol)
            name = atmline[1]
            if name == 'CA': self.mol.curRes.hasCA = 1
            if name == 'O' : self.mol.curRes.hasO = 2
            atom = Atom(name, self.mol.curRes, top = self.mol,
            chemicalElement = string.split(atmline[5], '.')[0])
            #atom.element = atmline[5][0]
            atom.element = atom.chemElem
            atom.number = int(atmline[0])
            self.mol.atmNum[atom.number] = atom
            atom._coords = [ [float(atmline[2]), float(atmline[3]),
                              float(atmline[4]) ] ]
            if len(atmline)>=9:                
                atom._charges['mol2'] = float(atmline[8])
                atom.chargeSet = 'mol2'
#            atom.conformation = 0
            atom.hetatm = 0
            #Add a data member containing a list of string describing
            # the Sybyl status bis of the atoms.
            atom.status = status
            #add altname so buildBondsByDist doesn't croak
            atom.altname = None
            self.mol.allAtoms.append(atom)
        delattr(self.mol, 'curRes')
        delattr(self.mol, 'curChain')
        
    def parse_MOL2_Molecule(self, mollines):
        """Function to parse the Molecule records"""
        mollines = map(string.split, mollines)
        return mollines

    def parse_MOL2_Bonds(self, bondlines):
        """ Function to build the bonds object using the bond record of
        the mol2 file."""
        bondlines = map(string.split, bondlines)
        for bd in bondlines:
            at1 = self.mol.atmNum[int(bd[1])]
            at2 = self.mol.atmNum[int(bd[2])]

            if at1.isBonded(at2): continue
            bond = Bond(at1, at2, check=0)
            bond.type = bd[3]
            try:
                bond.bondOrder = int(bd[3])
            except:
                if bd[3]=='ar':
                    bond.bondOrder = 'aromatic'
                elif bd[3]=='am':
                    bond.bondOrder = 'amide'
                else:
                    bond.bondOrder = bd[3]
        self.mol.bondsflag = 1
        self.mol.hasBonds = 1
        
    def parse_MOL2_Sets(self, setRecords):
        """ Function to parse the Sets records"""
        setRecords = map(string.split, self.allLines[setRecords[0]:
                                                     setRecords[1]])
        i = 0
        while i!=len(setRecords):
            rec = []
            if len(setRecords[i]) <= 5:
                comments = None
                for j in xrange(len(setRecords[i])):
                    rec.append(setRecords[i][j])
                rec.append(comments)
            else :
                for j in xrange(len(setRecords[i][:5])):
                    rec.append(setRecords[i][j])
                
                comments = setRecords[i][5]
                for j in xrange(6, len(setRecords[i])):
                    comments = comments+' '+setRecords[i][j]
                rec.append(comments)
            number = []
            
            self.setsDatas.append(rec)

##              self.setsDatas.append([setRecords[i][0], setRecords[i][1],
##                                     setRecords[i][2], setRecords[i][3],
##                                     setRecords[i][4],comments])
            while len(setRecords[i+1])!=0 and setRecords[i+1][-1] == '\\':
                number = number+(map(lambda x: int(x), setRecords[i+1][:-1]))
                i = i+1

            number = number+map(lambda x: int(x),setRecords[i+1])
            self.setsDatas[-1].append(number)
            i = i+2

    def hasSsDataInFile(self):
        """ Function to extract the data on the secondarystructure and
        that replace the root atom number by the residue instance
        corresonding. """
        hData = filter(lambda x: x[0][:4] == 'HELI',self.setsDatas)
        sData = filter(lambda x: x[0][:4] == 'SHEE',self.setsDatas)
        tData = filter(lambda x: x[0][:4] == 'TURN',self.setsDatas)
        self.processSSEltData(sData, self.mol)
        self.processSSEltData(hData, self.mol )
        self.processSSEltData(tData, self.mol)
        self.ssData = [hData, sData, tData]

        if self.ssData == []:
            return 0
        else:
            return 1
        
    def parseSSData(self, mol):
        """
        Function to parse the info and return a list containing,
        the record name, and then the first and last residues for each
        secondary structure .
        """
        if not hasattr(self, 'ssData'):
            self.hasSsDataInFile()

        # Step 1: Create a list containing the information describing the
        # the secondary structures organized the following way:
        # [ ['chain1ID', [Helix, [startHel1, endHel1],[startHel2, endHel2]],
        # [Strand, [startSheet1, endSheet1]] ], ['chain2ID', [Helix .....]] ]
        ssDataForMol = {}
        for c in mol.chains:
            helStartEndForChain = self.processSSData(self.ssData[0], c)
            helStartEndForChain.insert(0, Helix)

            strandData = self.findStrands(self.ssData[1])
            strandStartEndForChain = self.processSSData(strandData, c)
            strandStartEndForChain.insert(0, Strand)
        
            turnStartEndForChain = self.processSSData(self.ssData[2], c)
            turnStartEndForChain.insert(0, Turn)

            ssDataForMol[c.id] = [ helStartEndForChain,strandStartEndForChain,
                                   turnStartEndForChain, None]

        return ssDataForMol

    def findStrands(self, data):
        """ Function to separate each strands of a sheet."""
        if len(data) == 0: return data
        else:
            for sheet in data:
                strandsBreak = []
                strandData = []
                for i in xrange(1,len(sheet[6])):
                    if i != 1 and \
                       int(sheet[6][i].number) - int(sheet[6][i-1].number)!=1:
                        strandsBreak.append(i)

                if len(strandsBreak) == 0:
                    strandData = sheet
                else:
                    i = 0
                    strandData.append(sheet[0],sheet[1],sheet[2],
                                       sheet[3],sheet[4],sheet[5],
                                       sheet[6][:strandsBreak[i]])
                    i = i+1
                    while i!= len(strandsBreak):
                        strandData.append(sheet[0],sheet[1],sheet[2],
                                       sheet[3],sheet[4],sheet[5],
                                       sheet[6][strandsBreak[i-1]:
                                                strandsBreak[i]])
                        i = i+1

                    strandData.append(sheet[0],sheet[1],sheet[2],
                                       sheet[3],sheet[4],sheet[5],
                                       sheet[6][strandsBreak[i-1]:])
            return strandData

    def processSSData(self, data, chain):
        """
        Function returning the information on the secondary structure of
        a given chain as a list which format is the following:
        - the first element of the list is the name of the secondary structure
        type ('Helix', 'Sheet', 'Turn')
        - the other are tuple containing the first residue of the structure,
        and the last one.
        This information is used by the class GetSecondarySTructureFromFile.
        """
        dataByChainID = filter(lambda x, id = chain.id:
                                x[-1][1].parent.id == id,
                                data)
        startEnd = map(lambda x: (x[-1][1],x[-1][-1]), dataByChainID)
    
        return startEnd

    def processSSEltData(self, ssData, mol):
        """
        Function to get the residue corresponding to the root atom number.
        """
        atoms = mol.chains.residues.atoms
        for data in ssData:
            for i in xrange(1,len(data[6])):
                if isinstance(data[6][i], types.IntType):
                    data[6][i] = atoms[data[6][i]-1].parent
                else:
                    return
            
    def getMoleculeInformation(self):
        """ Function to get the information on a molecule"""
        molStr = self.parse_MOL2_Molecule(self.allLines
                                          [self.keysAndLinesIndices
                                           ['@<TRIPOS>MOLECULE'][0]:
                                           self.keysAndLinesIndices
                                           ['@<TRIPOS>MOLECULE'][1]])
        chemical_formula = None
        if molStr != []:
            try:
                chemical_formula = molStr[-1][0]
            except:
                pass
            molStr = molStr[0][0]
        else:
            molStr = ''
        if chemical_formula in ["USER_CHARGES","NO_CHARGES"]:
            return molStr
        elif chemical_formula is not None:
            return "%s %s" %(molStr, chemical_formula)
        return molStr
Exemple #15
0
    def parse(self, objClass=Protein):
        """Parses mmCIF dictionary (self.mmCIF_dict) into MolKit object"""
        if self.allLines is None and self.filename:
            self.readFile()
            if self.allLines is None or len(self.allLines) == 0:
                return
            self.mmCIF2Dict()
        type_symbol = None
        B_iso_or_equiv = None
        mmCIF_dict = self.mmCIF_dict
        fileName, fileExtension = os.path.splitext(self.filename)
        molName = os.path.basename(fileName)
        if mmCIF_dict.has_key('_entry.id'):
            molName = mmCIF_dict['_entry.id']
        if mmCIF_dict.has_key('_atom_site.id'):
            #The description of the data names can be found in the following link
            #http://mmcif.pdb.org/dictionaries/mmcif_pdbx.dic/Items
            ids = mmCIF_dict['_atom_site.id']  #1 number
            group_PDB = mmCIF_dict['_atom_site.group_PDB']  #2 atom/hetatm

            atom_id = mmCIF_dict['_atom_site.label_atom_id']  #3 name

            comp_id = mmCIF_dict['_atom_site.label_comp_id']  #4 residue type
            label_asym_id = mmCIF_dict['_atom_site.label_asym_id']  #5 chain
            #Note: chain ID from mmCIF file might be different from PDB file
            seq_id = mmCIF_dict['_atom_site.label_seq_id']  #6 residue number
            x_coords = mmCIF_dict['_atom_site.Cartn_x']  #7 xcoord
            y_coords = mmCIF_dict['_atom_site.Cartn_y']  #8 ycoord
            z_coords = mmCIF_dict['_atom_site.Cartn_z']  #9 zcoord
            occupancy = mmCIF_dict['_atom_site.occupancy']  #10
            B_iso_or_equiv = mmCIF_dict['_atom_site.B_iso_or_equiv']  #11
            type_symbol = mmCIF_dict['_atom_site.type_symbol']

        elif mmCIF_dict.has_key('_atom_site_label'):
            #ftp://ftp.iucr.org/pub/cif_core.dic
            atom_id = mmCIF_dict['_atom_site_label']
            len_atoms = len(atom_id)
            ids = range(len_atoms)

            group_PDB = len_atoms * ['HETATM']
            comp_id = len_atoms * ["CIF"]
            label_asym_id = len_atoms * ['1']
            seq_id = len_atoms * [1]

            from mglutil.math.crystal import Crystal
            a = mmCIF_dict['_cell.length_a'] = float(
                mmCIF_dict['_cell_length_a'].split('(')[0])
            b = mmCIF_dict['_cell.length_b'] = float(
                mmCIF_dict['_cell_length_b'].split('(')[0])
            c = mmCIF_dict['_cell.length_c'] = float(
                mmCIF_dict['_cell_length_c'].split('(')[0])
            alpha = mmCIF_dict['_cell.angle_alpha'] = float(
                mmCIF_dict['_cell_angle_alpha'].split('(')[0])
            beta = mmCIF_dict['_cell.angle_beta'] = float(
                mmCIF_dict['_cell_angle_beta'].split('(')[0])
            gamma = mmCIF_dict['_cell.angle_gamma'] = float(
                mmCIF_dict['_cell_angle_gamma'].split('(')[0])
            cryst = Crystal((a, b, c), (alpha, beta, gamma))
            x = []
            for item in mmCIF_dict['_atom_site_fract_x']:
                x.append(float(item.split('(')[0]))
            y = []
            for item in mmCIF_dict['_atom_site_fract_y']:
                y.append(float(item.split('(')[0]))
            z = []
            for item in mmCIF_dict['_atom_site_fract_z']:
                z.append(float(item.split('(')[0]))

            x_coords = []
            y_coords = []
            z_coords = []
            B_iso_or_equiv = []
            for i in ids:
                trans = cryst.toCartesian([x[i], y[i], z[i]])

                x_coords.append(trans[0])
                y_coords.append(trans[1])
                z_coords.append(trans[2])
                if mmCIF_dict.has_key('_atom_site_U_iso_or_equiv'):
                    B_iso_or_equiv.append(
                        mmCIF_dict['_atom_site_U_iso_or_equiv'][i].split(
                            '(')[0])
            if mmCIF_dict.has_key('_atom_site_type_symbol'):
                type_symbol = mmCIF_dict['_atom_site_type_symbol']
            if mmCIF_dict.has_key('_atom_site_occupancy'):
                occupancy = mmCIF_dict['_atom_site_occupancy']
            if mmCIF_dict.has_key('_chemical_name_common'):
                molName = mmCIF_dict['_chemical_name_common']
            elif mmCIF_dict.has_key('_chemical_name_mineral'):
                molName = mmCIF_dict['_chemical_name_mineral']

            if mmCIF_dict.has_key('_symmetry_space_group_name_H-M'):
                mmCIF_dict['_symmetry.space_group_name_H-M'] = mmCIF_dict[
                    '_symmetry_space_group_name_H-M']
        else:
            print 'No _atom_site.id or _atom_site_label record is available in %s' % self.filename
            return None

        mol = Protein()
        self.mol = mol
        self.mol.allAtoms = AtomSet([])
        molList = mol.setClass()
        molList.append(mol)
        current_chain_id = None
        current_residue_number = None
        current_chain = None
        current_residue = None

        number_of_atoms = len(ids)

        self.configureProgressBar(init=1,
                                  mode='increment',
                                  authtext='parse atoms',
                                  max=number_of_atoms)
        for index in range(number_of_atoms):
            #make a new atom for the current index
            chain_id = label_asym_id[index]
            if chain_id != current_chain_id:  #make a new chain
                #molecule should adopt the current chain if there is one
                current_chain = Chain(id=chain_id)
                # FIXME: current_chain should not have allAtoms attribute
                delattr(current_chain, "allAtoms")
                current_chain_id = chain_id

                if current_chain is not None:  #REMEMBER TO ADOPT THE LAST ONE!!!
                    mol.adopt(current_chain, setChildrenTop=1)
            residue_number = seq_id[index]

            if residue_number != current_residue_number or chain_id != label_asym_id[
                    index - 1]:  #make a new chain:
                #current_chain should adopt the current residue if there is one
                #create new residue
                residue_type = comp_id[index]
                current_residue = Residue(type=residue_type,
                                          number=residue_number)
                current_residue_number = residue_number
                if current_residue is not None:  #REMEMBER TO ADOPT THE LAST ONE!!!
                    current_chain.adopt(current_residue, setChildrenTop=1)

            name = atom_id[index]
            if type_symbol:
                element = type_symbol[index]
            else:
                element = None
            atom = Atom(name, current_residue, element, top=mol)
            atom._coords = [[
                float(x_coords[index]),
                float(y_coords[index]),
                float(z_coords[index])
            ]]
            atom._charges = {}
            atom.segID = mol.name
            atom.normalname = name
            atom.number = int(ids[index])
            mol.atmNum[atom.number] = atom
            atom.occupancy = float(occupancy[index])
            if B_iso_or_equiv:
                atom.temperatureFactor = float(B_iso_or_equiv[index])
            atom.altname = None
            atom.hetatm = 0
            if group_PDB[index] == 'HETATM':
                atom.hetatm = 1
            self.updateProgressBar()

        self.parse_MMCIF_CELL()
        try:
            self.parse_MMCIF_HYDBND()
        except:
            print >> sys.stderr, "Parsing Hydrogen Bond Record Failed in", self.filename

        mol.name = molName
        mol.allAtoms = mol.chains.residues.atoms

        mol.parser = self
        mol.levels = [Protein, Chain, Residue, Atom]
        name = ''
        for n in molList.name:
            name = n + ','
        name = name[:-1]
        molList.setStringRepr(name)
        strRpr = name + ':::'
        molList.allAtoms.setStringRepr(strRpr)
        for m in molList:
            mname = m.name
            strRpr = mname + ':::'
            m.allAtoms.setStringRepr(strRpr)
            strRpr = mname + ':'
            m.chains.setStringRepr(strRpr)
            for c in m.chains:
                cname = c.id
                strRpr = mname + ':' + cname + ':'
                c.residues.setStringRepr(strRpr)
                for r in c.residues:
                    rname = r.name
                    strRpr = mname + ':' + cname + ':' + rname + ':'
                    r.atoms.setStringRepr(strRpr)
        self.buildBonds()
        return molList
Exemple #16
0
def makeMoleculeFromAtoms(molname, atomSet):
    """
    create a new molecule from a list of atoms

    mol <- makeMoleculeFromAtoms(molname, atomSet)
"""
    from MolKit.molecule import Atom, AtomSet
    from MolKit.protein import Protein, Chain, Residue

    # create the top object
    mol = Protein(name=molname)

    # find out all residues
    residues = atomSet.parent.uniq()

    # find out all chains
    chains = residues.parent.uniq()

    # create all chains
    chainsd = {}
    for c in chains:
        newchain = Chain(c.id, mol, top=mol)
        chainsd[c] = newchain

    # create all residues
    resd = {}
    for res in residues:
        newres = Residue(res.name[:3],
                         res.name[3:],
                         res.icode,
                         chainsd[res.parent],
                         top=mol)
        resd[res] = newres
        newres.hasCA = 0
        newres.hasO = 0

    # create all the atoms
    newats = []
    for num, at in enumerate(atomSet):
        name = at.name
        res = resd[at.parent]
        name1 = name
        if hasattr(at, "altname") and at.altname != None:
            name = at.name.split("@")[0]
        if name == 'CA':
            res.hasCA = 1
        if name == 'O' or name == 'OXT' or (len(name) > 3
                                            and name[:3] == 'OCT'):
            res.hasO = 2

        newat = Atom(name, res, at.element, top=mol)
        if name != name1:
            newat.name = name1
            newat.altname = at.altname
        newats.append(newat)
        # set constructotr attributes
        newat._coords = []
        for coords in at._coords:
            newat._coords.append(coords[:])
        newat.conformation = at.conformation
        newat.chemElem = at.chemElem
        newat.atomicNumber = at.atomicNumber
        newat.bondOrderRadius = at.bondOrderRadius
        newat.covalentRadius = at.covalentRadius
        newat.vdwRadius = at.vdwRadius
        newat.maxBonds = at.maxBonds
        newat.organic = at.organic
        newat.colors = at.colors.copy()
        newat.opacities = at.opacities.copy()
        newat._charges = at._charges.copy()
        newat.chargeSet = at.chargeSet

        # set attributes from PDB parser
        try:  # pdbqs do not have this
            newat.segID = at.segID
        except AttributeError:
            pass
        newat.hetatm = at.hetatm
        try:  # pdbqs do not have this
            newat.normalname = at.normalname
        except AttributeError:
            pass
        newat.number = num  #at.number
        newat.occupancy = at.occupancy
        newat.temperatureFactor = at.temperatureFactor
        newat.altname = at.altname

        # attribute created by PQR parser
        if hasattr(at, 'pqrRadius'):
            newat.pqrRadius = at.pqrRadius

        # attribute created by F2D parser
        if hasattr(at, 'hbstatus'):
            newat.hbstatus = at.hbstatus

        # attribute created by PDBQ parser
        if hasattr(at, 'autodock_element'):
            newat.autodock_element = at.autodock_element

        # attribute created by PDBQT parser
        #if hasattr(at, ''):
        #    newat. = at.

        # attribute created by PDBQS parser
        if hasattr(at, 'AtVol'):
            newat.AtVol = at.AtVol
            newat.AtSolPar = at.AtSolPar

    mol.allAtoms = AtomSet(newats)
    return mol
Exemple #17
0
    def build3LevelsTree(self, atomlines):
        """ Function to build a 3 levels hierarchy Molecule-substructure-atoms."""

        self.mol = Protein()
        self.mol.allAtoms = AtomSet()
        self.mol.atmNum = {}
        self.mol.parser = self
        if self.mol.name == 'NoName':
            self.mol.name = os.path.basename(
                os.path.splitext(self.filename)[0])
        self.mol.children = ResidueSet([])
        self.mol.childrenName = 'residues'
        self.mol.childrenSetClass = ResidueSet
        self.mol.elementType = Residue
        self.mol.curRes = Residue()
        self.mol.curRes.hasCA = 0
        self.mol.curRes.hasO = 0

        self.mol.levels = [Protein, Residue, Atom]
        for atmline in atomlines:
            if len(atmline) >= 10:
                status = string.split(atmline[9], '|')
            else:
                status = None
            resName = atmline[7][:3]
            resSeq = atmline[7][3:]
            if resSeq != self.mol.curRes.number or \
               resName != self.mol.curRes.type:
                # check if this residue already exists
                na = string.strip(resName) + string.strip(resSeq)
                res = self.mol.get(na)
                if res:
                    self.mol.curRes = res[0]
                else:
                    self.mol.curRes = Residue(resName,
                                              resSeq,
                                              '',
                                              self.mol,
                                              top=self.mol)
            name = atmline[1]
            if name == 'CA': self.mol.curRes.hasCA = 1
            if name == 'O': self.mol.curRes.hasO = 2
            atom = Atom(name,
                        self.mol.curRes,
                        top=self.mol,
                        chemicalElement=string.split(atmline[5], '.')[0])
            #atom.element = atmline[5][0]
            atom.element = atom.chemElem
            atom.number = int(atmline[0])
            self.mol.atmNum[atom.number] = atom
            atom._coords = [[
                float(atmline[2]),
                float(atmline[3]),
                float(atmline[4])
            ]]
            atom._charges['mol2'] = float(atmline[8])
            atom.chargeSet = mol2
            #            atom.conformation = 0
            atom.hetatm = 0
            #Add a data member containing a list of string describing
            # the Sybyl status bis of the atoms.
            atom.status = status
            #add altname so buildBondsByDist doesn't croak
            atom.altname = None
            self.mol.allAtoms.append(atom)

        self.mol.residues = self.mol.children
        assert hasattr(self.mol, 'chains')
        delattr(self.mol, 'chains')
        delattr(self.mol, 'curRes')
Exemple #18
0
    def parse( self, objClass=Protein ):
        if self.allLines is None and self.filename:
            self.readFile()
            if self.allLines is None or len(self.allLines)==0:
                return

        mol = Protein()
        self.mol = mol
        molList = mol.setClass()
        molList.append( mol )
        current_residue_number = None
        current_chain = None
        current_residue = None
        number_of_atoms = int(self.allLines[1][:5])

        self.configureProgressBar( init=1, mode='increment', 
                                  authtext='parse atoms', max=number_of_atoms )
        
        
        current_chain = Chain( id='GRO',)
        #FIX this: The existence of allAtoms attribute (and the fact that it is an empty set rather than all atoms in the chain) causes getNodesByMolecule() to return wrong values
        if hasattr(current_chain, "allAtoms"):
            del(current_chain.allAtoms)
        #current_chain = Chain( id='GRO',parent = mol)
        mol.adopt( current_chain, setChildrenTop=1 )
         
        for index in range( 2,number_of_atoms+2 ):              
            residue_number = int(self.allLines[index][:5])
            if residue_number!=current_residue_number:# 
                #current_chain should adopt the current residue if there is one
                #create new residue
                res_type = self.allLines[index][5:10]
                residue_type = res_type.split(' ')[0]
                
                current_residue = Residue( type=residue_type, number=residue_number )
                current_residue_number = residue_number
                if current_residue is not None:    #REMEMBER TO ADOPT THE LAST ONE!!!
                     
                    current_chain.adopt( current_residue, setChildrenTop=1 )
                            
            n = self.allLines[index][10:15]
            name = n.split(' ')[-1]
            element = name 
            
            if element in babel_elements.keys():
                element = element

            else:
                 
                if residue_type == "System" or residue_type == "SOL":                 
                    #if element[1] == 'W':
                    #          element = 'H'
                        #   group is treated as one particle
                    #else:
                    element = element[0]

                elif element[:2] == 'Me':
                    element = 'C'
                else:
                    element = element[0]
                
            #if len(element)>1:
            #    if type(element[1]) == types.StringType:
            #        
            #        if element[1] == element[1].lower():
            #            element =element
            #        else:
            #            element = element[0]
            #            
            #    else:    
            #        element = element[0]
                
            atom = Atom( name, current_residue, element, top=mol )
            c =  self.allLines[index][15:20]
            cx = self.allLines[index][20:28] 
            cy = self.allLines[index][28:36]
            cz = self.allLines[index][36:44]
            
            x = float(cx)*10
            y = float(cy)*10
            z = float(cz)*10
            atom._coords = [[x, y, z]]
             
            atom._charges = []
            atom.segID =  mol.name   
            atom.normalname = name
            atom.number = int(self.allLines[index][15:20])
            atom.elementType = name[0]
            mol.atmNum[atom.number] = atom
            atom.altname = None    
            atom.hetatm = 0
        mol.name = os.path.split(os.path.splitext(self.filename)[0])[-1]
        mol.allAtoms = mol.chains.residues.atoms
        mol.parser = self
        mol.levels = [Protein, Chain, Residue, Atom]
        name = ''
        for n in molList.name:
            name = n + ','
        name = name[:-1]
        molList.setStringRepr( name )
        strRpr = name + ':::'
        molList.allAtoms.setStringRepr( strRpr )
        for m in molList:
            mname = m.name
            strRpr = mname + ':::'
            m.allAtoms.setStringRepr( strRpr )
            strRpr = mname + ':'
            m.chains.setStringRepr( strRpr )
            for c in m.chains:
                cname = c.id
                strRpr = mname + ':' + cname + ':'
                c.residues.setStringRepr( strRpr )
                for r in c.residues:
                    rname = r.name
                    strRpr = mname + ':' + cname + ':' + rname + ':'
                    r.atoms.setStringRepr( strRpr )        
        return molList
Exemple #19
0
class Mol2Parser(MoleculeParser):

    Mol2Tags = [
        "@<TRIPOS>AlT_TYPE", "@<TRIPOS>ANCHOR_ATOM",
        "@<TRIPOS>ASSOCIATED_ANNOTATION", "@<TRIPOS>ATOM", "@<TRIPOS>BOND",
        "@<TRIPOS>CENTER_OF_MASS", "@<TRIPOS>CENTROID", "@<TRIPOS>COMMENT",
        "@<TRIPOS>CRYSIN", "@<TRIPOS>CURR_POS", "@<TRIPOS>DICT",
        "@<TRIPOS>DATA_FILE", "@<TRIPOS>EXTENSION_POINT", "@<TRIPOS>FF_PBC",
        "@<TRIPOS>FFCON_ANGLE", "@<TRIPOS>FFCON_DIST", "@<TRIPOS>FFCON_RANGE",
        "@<TRIPOS>FFCON_TORSION", "@<TRIPOS>LINE", "@<tripos>LSPLANE",
        "@<TRIPOS>MOLECULE", "@<TRIPOS>NORMAL", "@<TRIPOS>POLYBUILD_HIST",
        "@<TRIPOS>QSAR_ALIGN_RULE", "@<TRIPOS>RING_CLOSURE",
        "@<TRIPOS>ROTABLE_BOND", "@<TRIPOS>SEARCH_DIST",
        "@<TRIPOS>SEARCH_OPTIONS", "@<TRIPOS>SUBSTRUCTURE", "@<TRIPOS>U_FEAT"
    ]

    def __init__(self, filename):
        MoleculeParser.__init__(self, filename)
        self.mol2RecordParser = {}
        self.defaultReadOptions = [
            '@<TRIPOS>ATOM', '@<TRIPOS>BOND', '@<TRIPOS>MOLECULE',
            '@<TRIPOS>SET', '@<TRIPOS>SUBSTRUCTURE', '@<TRIPOS>DICT'
        ]
        self.keysAndLinesIndices = {}  # stores all Mol2 keys .
        self.counter = 0
        self.setsDatas = []
        #self.molList = []

    def getKeysAndLinesIndices(self):
        """ Function to build a dictionary where the keys will be the
        records name of the mol2 files (@<TRIPOS>ATOM, @<TRIPOS>BOND...) and
        the value will be the index of the starting line of that record.
        """
        #this removes all comment and blank lines to fix bug #846
        for i, line in enumerate(self.allLines):
            if not line:
                self.allLines.pop(i)
            elif line[0] == '#':
                self.allLines.pop(i)

        i = 0
        record = None
        while i != len(self.allLines):
            if self.allLines[i][:9] == '@<TRIPOS>':
                if self.keysAndLinesIndices:
                    self.keysAndLinesIndices[record].append(i)
                record = string.strip(self.allLines[i])
                self.keysAndLinesIndices[record] = [i + 1]
                i = i + 1
            else:
                i = i + 1
        if record:
            self.keysAndLinesIndices[record].append(i)
        else:
            print " the file %s doesn't contain any mol2 records" % self.filename

    def parse(self):
        """ This function read a file and create the corresponding
        data hierarchy. """
        self.readFile()
        #molList = []
        molList = ProteinSet()
        if self.allLines is None:
            return
        elif len(self.allLines) != 0:
            self.getKeysAndLinesIndices()
        else:
            print "The file %s is empty" % self.filename
            return molList

        if not self.keysAndLinesIndices.has_key("@<TRIPOS>ATOM"):
            print "The file %s doesn't have Atom records, molecules can't be built" % self.filename
            return molList
        if self.keysAndLinesIndices.has_key('@<TRIPOS>SUBSTRUCTURE'):

            self.parse_MOL2_Substructure(self.allLines[
                self.keysAndLinesIndices['@<TRIPOS>SUBSTRUCTURE'][0]:self.
                keysAndLinesIndices['@<TRIPOS>SUBSTRUCTURE'][1]])
            molList.append(self.mol)

        else:
            atmlines = map(
                string.split,
                self.allLines[self.keysAndLinesIndices['@<TRIPOS>ATOM'][0]:self
                              .keysAndLinesIndices['@<TRIPOS>ATOM'][1]])
            self.build4LevelsTree({}, atmlines)
            ##              self.build2LevelsTree(map(string.split, self.allLines
            ##                                        [self.keysAndLinesIndices
            ##                                         ['@<TRIPOS>ATOM'][0]:
            ##                                         self.keysAndLinesIndices
            ##                                         ['@<TRIPOS>ATOM'][1]]))
            molList.append(self.mol)

        if self.keysAndLinesIndices.has_key('@<TRIPOS>BOND'):
            self.parse_MOL2_Bonds(
                self.allLines[self.keysAndLinesIndices['@<TRIPOS>BOND'][0]:self
                              .keysAndLinesIndices['@<TRIPOS>BOND'][1]])

        if self.keysAndLinesIndices.has_key('@<TRIPOS>SET'):
            self.parse_MOL2_Sets(self.keysAndLinesIndices['@<TRIPOS>SET'])

        return molList

    def parse_MOL2_Substructure(self, substlines):
        """build a dictionary with the chain id as keys and the
        list of residues belonging to that chain as values. If
        the id of the chain is not here then the keys is '', if
        two residues with the same name but belonging to two different
        chains then the value corresponding to that key is a list of
        the chains ID."""
        atomlines = map(
            string.split,
            self.allLines[self.keysAndLinesIndices['@<TRIPOS>ATOM'][0]:self.
                          keysAndLinesIndices['@<TRIPOS>ATOM'][1]])
        subst_chain = {}
        if len(substlines) == 0:
            # case 1: no substructures are defined --> 2 levels tree.
            #self.build2LevelsTree(atomlines)
            #subst_chain = {'t
            self.build4LevelsTree(subst_chain, atomlines)

##          else:
##              substlines = map(string.split, substlines)
##              lines = filter(lambda x: len(x)>5, substlines)
##              if lines == [] or (lines != [] and \
##                                 filter(lambda x: x[5] != '****', lines)==[]):

##                  #self.build3LevelsTree(atomlines)

##              else:
##                  # case 3: at least 1 substructure and 1 chain --> 4 levels tree.
##                  #subst_chain = {}

##                  for line in substlines:
##                      try:
##                          if line[1] in subst_chain.keys():
##                              subst_chain[line[1]] = [subst_chain[line[1]]]
##                              subst_chain[line[1]].append(line[5])
##                          else:
##                              subst_chain[line[1]] = line[5]
##                      except:
##                          if line[1] in subst_chain.keys():
##                              list(subst_chain[line[1]]).append('')
##                          else:
##                              subst_chain[line[1]] = ''
##                  self.subst_chain = subst_chain
##                  self.build4LevelsTree(subst_chain,atomlines)
        else:
            # case 3: at least 1 substructure and 1 chain --> 4 levels tree.
            #subst_chain = {}
            substlines = map(string.split, substlines)
            for line in substlines:
                if len(line) < 6 or line[5] == '****':
                    continue
                else:
                    try:
                        if line[1] in subst_chain.keys():
                            subst_chain[line[1]] = [subst_chain[line[1]]]
                            subst_chain[line[1]].append(line[5])
                        else:
                            subst_chain[line[1]] = line[5]
                    except:
                        if line[1] in subst_chain.keys():
                            list(subst_chain[line[1]]).append('')
                        else:
                            subst_chain[line[1]] = ''
            self.subst_chain = subst_chain
            self.build4LevelsTree(subst_chain, atomlines)

    def build2LevelsTree(self, atomlines):
        """
        Function to build a two level tree. 
        """
        print 'try to build a 2 level tree'
        self.mol = Molecule()
        self.mol.allAtoms = AtomSet()
        self.mol.atmNum = {}
        self.mol.parser = self
        if self.mol.name == 'NoName':
            self.mol.name = os.path.basename(
                os.path.splitext(self.filename)[0])

        self.mol.children = AtomSet([])
        self.mol.childrenName = 'atoms'
        self.mol.childrenSetClass = AtomSet
        self.mol.elementType = Atom
        self.mol.levels = [Molecule, Atom]
        ##1/18:self.mol.levels = [Protein, Atom]
        for atmline in atomlines:
            atom = Atom(atmline[1],
                        self.mol,
                        chemicalElement=string.split(atmline[5], '.')[0],
                        top=self.mol)
            #atom.element = atmline[5][0]
            atom.element = atom.chemElem
            atom.number = int(atmline[0])
            self.mol.atmNum[atom.number] = atom
            atom._coords = [[
                float(atmline[2]),
                float(atmline[3]),
                float(atmline[4])
            ]]
            if len(atmline) >= 9:
                atom._charges['mol2'] = float(atmline[8])
                atom.chargeSet = 'mol2'
#            atom.conformation = 0
            atom.hetatm = 0
            #add altname so buildBondsByDist doesn't croak
            atom.altname = None
            self.mol.allAtoms.append(atom)
        self.mol.atoms = self.mol.children

    def build3LevelsTree(self, atomlines):
        """ Function to build a 3 levels hierarchy Molecule-substructure-atoms."""

        self.mol = Protein()
        self.mol.allAtoms = AtomSet()
        self.mol.atmNum = {}
        self.mol.parser = self
        if self.mol.name == 'NoName':
            self.mol.name = os.path.basename(
                os.path.splitext(self.filename)[0])
        self.mol.children = ResidueSet([])
        self.mol.childrenName = 'residues'
        self.mol.childrenSetClass = ResidueSet
        self.mol.elementType = Residue
        self.mol.curRes = Residue()
        self.mol.curRes.hasCA = 0
        self.mol.curRes.hasO = 0

        self.mol.levels = [Protein, Residue, Atom]
        for atmline in atomlines:
            if len(atmline) >= 10:
                status = string.split(atmline[9], '|')
            else:
                status = None
            resName = atmline[7][:3]
            resSeq = atmline[7][3:]
            if resSeq != self.mol.curRes.number or \
               resName != self.mol.curRes.type:
                # check if this residue already exists
                na = string.strip(resName) + string.strip(resSeq)
                res = self.mol.get(na)
                if res:
                    self.mol.curRes = res[0]
                else:
                    self.mol.curRes = Residue(resName,
                                              resSeq,
                                              '',
                                              self.mol,
                                              top=self.mol)
            name = atmline[1]
            if name == 'CA': self.mol.curRes.hasCA = 1
            if name == 'O': self.mol.curRes.hasO = 2
            atom = Atom(name,
                        self.mol.curRes,
                        top=self.mol,
                        chemicalElement=string.split(atmline[5], '.')[0])
            #atom.element = atmline[5][0]
            atom.element = atom.chemElem
            atom.number = int(atmline[0])
            self.mol.atmNum[atom.number] = atom
            atom._coords = [[
                float(atmline[2]),
                float(atmline[3]),
                float(atmline[4])
            ]]
            atom._charges['mol2'] = float(atmline[8])
            atom.chargeSet = mol2
            #            atom.conformation = 0
            atom.hetatm = 0
            #Add a data member containing a list of string describing
            # the Sybyl status bis of the atoms.
            atom.status = status
            #add altname so buildBondsByDist doesn't croak
            atom.altname = None
            self.mol.allAtoms.append(atom)

        self.mol.residues = self.mol.children
        assert hasattr(self.mol, 'chains')
        delattr(self.mol, 'chains')
        delattr(self.mol, 'curRes')

    def build4LevelsTree(self, subst_chain, atomlines):
        """
        Function to build a 4 level hierarchy Protein-Chain-Residue-Atom.

        """
        self.mol = Protein()
        self.mol.allAtoms = AtomSet()
        self.mol.atmNum = {}
        self.mol.parser = self
        if self.mol.name == 'NoName':
            self.mol.name = os.path.basename(
                os.path.splitext(self.filename)[0])
        self.mol.curChain = Chain()
        self.mol.curRes = Residue()
        self.mol.levels = [Protein, Chain, Residue, Atom]
        i = 1
        for atmline in atomlines:
            if len(atmline) >= 10:
                status = string.split(atmline[9], '|')
            else:
                status = None
            if len(atmline) == 8:
                tmp = [atmline[5][:5], atmline[5][5:]]
                atmline[5] = tmp[0]
                atmline.insert(6, tmp[1])

            if status and status[0] == 'WATER':
                chainID = 'W'
                atmline[7] = 'HOH' + str(i)
                subst_chain[atmline[7]] = chainID
                i = i + 1

            if subst_chain == {}:
                chainID = 'default'

            elif not subst_chain.has_key(atmline[7]):
                if subst_chain.has_key('****'):
                    try:
                        chainID = subst_chain[atmline[7]]
                    except:
                        chainID = 'default'
                else:
                    chainID = 'default'

            elif type(subst_chain[atmline[7]]) is types.StringType:
                # that is to say that only chains has this substructure name.
                chainID = subst_chain[atmline[7]]

            elif type(subst_chain[atmline[7]]) is types.ListType:
                # That is to say that several chains have the same substructure.
                chainID = subst_chain[atmline[7]][0]
                subst_chain[atmline[7]] = subst_chain[atmline[7]].remove(
                    chainID)

            if chainID != self.mol.curChain.id:
                if not self.mol.chains.id or not chainID in self.mol.chains.id:
                    self.mol.curChain = Chain(chainID, self.mol, top=self.mol)
                else:
                    self.mol.curChain = self.mol.chains.get(chainID)[0]

            if len(atmline) < 7:
                # test if the atmline has a res name and resseq:
                resName = 'RES'
                resSeq = '1'
            else:
                resName = atmline[7][:3]
                resSeq = atmline[7][3:]

            if resSeq != self.mol.curRes.number or \
               resName != self.mol.curRes.type:
                # check if this residue already exists
                na = string.strip(resName) + string.strip(resSeq)
                res = self.mol.curChain.get(na)
                if res:
                    self.mol.curRes = res[0]
                else:
                    self.mol.curRes = Residue(resName,
                                              resSeq,
                                              '',
                                              self.mol.curChain,
                                              top=self.mol)
            name = atmline[1]
            if name == 'CA': self.mol.curRes.hasCA = 1
            if name == 'O': self.mol.curRes.hasO = 2
            atom = Atom(name,
                        self.mol.curRes,
                        top=self.mol,
                        chemicalElement=string.split(atmline[5], '.')[0])
            #atom.element = atmline[5][0]
            atom.element = atom.chemElem
            atom.number = int(atmline[0])
            self.mol.atmNum[atom.number] = atom
            atom._coords = [[
                float(atmline[2]),
                float(atmline[3]),
                float(atmline[4])
            ]]
            if len(atmline) >= 9:
                atom._charges['mol2'] = float(atmline[8])
                atom.chargeSet = 'mol2'
#            atom.conformation = 0
            atom.hetatm = 0
            #Add a data member containing a list of string describing
            # the Sybyl status bis of the atoms.
            atom.status = status
            #add altname so buildBondsByDist doesn't croak
            atom.altname = None
            self.mol.allAtoms.append(atom)
        delattr(self.mol, 'curRes')
        delattr(self.mol, 'curChain')

    def parse_MOL2_Molecule(self, mollines):
        """Function to parse the Molecule records"""
        mollines = map(string.split, mollines)
        return mollines

    def parse_MOL2_Bonds(self, bondlines):
        """ Function to build the bonds object using the bond record of
        the mol2 file."""
        bondlines = map(string.split, bondlines)
        for bd in bondlines:
            at1 = self.mol.atmNum[int(bd[1])]
            at2 = self.mol.atmNum[int(bd[2])]

            if at1.isBonded(at2): continue
            bond = Bond(at1, at2, check=0)
            bond.type = bd[3]
            try:
                bond.bondOrder = int(bd[3])
            except:
                if bd[3] == 'ar':
                    bond.bondOrder = 'aromatic'
                elif bd[3] == 'am':
                    bond.bondOrder = 'amide'
                else:
                    bond.bondOrder = bd[3]
        self.mol.bondsflag = 1
        self.mol.hasBonds = 1

    def parse_MOL2_Sets(self, setRecords):
        """ Function to parse the Sets records"""
        setRecords = map(string.split,
                         self.allLines[setRecords[0]:setRecords[1]])
        i = 0
        while i != len(setRecords):
            rec = []
            if len(setRecords[i]) <= 5:
                comments = None
                for j in xrange(len(setRecords[i])):
                    rec.append(setRecords[i][j])
                rec.append(comments)
            else:
                for j in xrange(len(setRecords[i][:5])):
                    rec.append(setRecords[i][j])

                comments = setRecords[i][5]
                for j in xrange(6, len(setRecords[i])):
                    comments = comments + ' ' + setRecords[i][j]
                rec.append(comments)
            number = []

            self.setsDatas.append(rec)

            ##              self.setsDatas.append([setRecords[i][0], setRecords[i][1],
            ##                                     setRecords[i][2], setRecords[i][3],
            ##                                     setRecords[i][4],comments])
            while len(setRecords[i + 1]) != 0 and setRecords[i +
                                                             1][-1] == '\\':
                number = number + (map(lambda x: int(x),
                                       setRecords[i + 1][:-1]))
                i = i + 1

            number = number + map(lambda x: int(x), setRecords[i + 1])
            self.setsDatas[-1].append(number)
            i = i + 2

    def hasSsDataInFile(self):
        """ Function to extract the data on the secondarystructure and
        that replace the root atom number by the residue instance
        corresonding. """
        hData = filter(lambda x: x[0][:4] == 'HELI', self.setsDatas)
        sData = filter(lambda x: x[0][:4] == 'SHEE', self.setsDatas)
        tData = filter(lambda x: x[0][:4] == 'TURN', self.setsDatas)
        self.processSSEltData(sData, self.mol)
        self.processSSEltData(hData, self.mol)
        self.processSSEltData(tData, self.mol)
        self.ssData = [hData, sData, tData]

        if self.ssData == []:
            return 0
        else:
            return 1

    def parseSSData(self, mol):
        """
        Function to parse the info and return a list containing,
        the record name, and then the first and last residues for each
        secondary structure .
        """
        if not hasattr(self, 'ssData'):
            self.hasSsDataInFile()

        # Step 1: Create a list containing the information describing the
        # the secondary structures organized the following way:
        # [ ['chain1ID', [Helix, [startHel1, endHel1],[startHel2, endHel2]],
        # [Strand, [startSheet1, endSheet1]] ], ['chain2ID', [Helix .....]] ]
        ssDataForMol = {}
        for c in mol.chains:
            helStartEndForChain = self.processSSData(self.ssData[0], c)
            helStartEndForChain.insert(0, Helix)

            strandData = self.findStrands(self.ssData[1])
            strandStartEndForChain = self.processSSData(strandData, c)
            strandStartEndForChain.insert(0, Strand)

            turnStartEndForChain = self.processSSData(self.ssData[2], c)
            turnStartEndForChain.insert(0, Turn)

            ssDataForMol[c.id] = [
                helStartEndForChain, strandStartEndForChain,
                turnStartEndForChain, None
            ]

        return ssDataForMol

    def findStrands(self, data):
        """ Function to separate each strands of a sheet."""
        if len(data) == 0: return data
        else:
            for sheet in data:
                strandsBreak = []
                strandData = []
                for i in xrange(1, len(sheet[6])):
                    if i != 1 and \
                       int(sheet[6][i].number) - int(sheet[6][i-1].number)!=1:
                        strandsBreak.append(i)

                if len(strandsBreak) == 0:
                    strandData = sheet
                else:
                    i = 0
                    strandData.append(sheet[0], sheet[1], sheet[2], sheet[3],
                                      sheet[4], sheet[5],
                                      sheet[6][:strandsBreak[i]])
                    i = i + 1
                    while i != len(strandsBreak):
                        strandData.append(
                            sheet[0], sheet[1], sheet[2], sheet[3], sheet[4],
                            sheet[5],
                            sheet[6][strandsBreak[i - 1]:strandsBreak[i]])
                        i = i + 1

                    strandData.append(sheet[0], sheet[1], sheet[2], sheet[3],
                                      sheet[4], sheet[5],
                                      sheet[6][strandsBreak[i - 1]:])
            return strandData

    def processSSData(self, data, chain):
        """
        Function returning the information on the secondary structure of
        a given chain as a list which format is the following:
        - the first element of the list is the name of the secondary structure
        type ('Helix', 'Sheet', 'Turn')
        - the other are tuple containing the first residue of the structure,
        and the last one.
        This information is used by the class GetSecondarySTructureFromFile.
        """
        dataByChainID = filter(lambda x, id=chain.id: x[-1][1].parent.id == id,
                               data)
        startEnd = map(lambda x: (x[-1][1], x[-1][-1]), dataByChainID)

        return startEnd

    def processSSEltData(self, ssData, mol):
        """
        Function to get the residue corresponding to the root atom number.
        """
        atoms = mol.chains.residues.atoms
        for data in ssData:
            for i in xrange(1, len(data[6])):
                if isinstance(data[6][i], types.IntType):
                    data[6][i] = atoms[data[6][i] - 1].parent
                else:
                    return

    def getMoleculeInformation(self):
        """ Function to get the information on a molecule"""
        molStr = self.parse_MOL2_Molecule(
            self.allLines[self.keysAndLinesIndices['@<TRIPOS>MOLECULE'][0]:self
                          .keysAndLinesIndices['@<TRIPOS>MOLECULE'][1]])
        chemical_formula = None
        if molStr != []:
            try:
                chemical_formula = molStr[-1][0]
            except:
                pass
            molStr = molStr[0][0]
        else:
            molStr = ''
        if chemical_formula in ["USER_CHARGES", "NO_CHARGES"]:
            return molStr
        elif chemical_formula is not None:
            return "%s %s" % (molStr, chemical_formula)
        return molStr
Exemple #20
0
def makeMoleculeFromAtoms(molname, atomSet):
    """
    create a new molecule from a list of atoms

    mol <- makeMoleculeFromAtoms(molname, atomSet)
"""
    from MolKit.molecule import Atom, AtomSet
    from MolKit.protein import Protein, Chain, Residue


    # create the top object
    mol = Protein(name=molname)

    # find out all residues
    residues = atomSet.parent.uniq()

    # find out all chains
    chains = residues.parent.uniq()

    # create all chains
    chainsd = {}
    for c in chains:
        newchain = Chain(c.id, mol, top=mol)
        chainsd[c] = newchain

    # create all residues
    resd = {}
    for res in residues:
        newres = Residue(res.name[:3], res.name[3:], res.icode,
                         chainsd[res.parent], top=mol)
        resd[res] = newres
        newres.hasCA = 0
        newres.hasO = 0

    # create all the atoms
    newats = []
    for num, at in enumerate(atomSet):
        name = at.name
        res = resd[at.parent]
        if name == 'CA':
            res.hasCA = 1
        if name == 'O' or name == 'OXT' or (len(name)>3 and name[:3]=='OCT'):
            res.hasO = 2
        
        newat = Atom(name, res, at.element, top=mol)
        newats.append(newat)
        # set constructotr attributes
        newat._coords = []
        for coords in at._coords:
            newat._coords.append(coords[:])
        newat.conformation = at.conformation
        newat.chemElem = at.chemElem
        newat.atomicNumber = at.atomicNumber
        newat.bondOrderRadius = at.bondOrderRadius
        newat.covalentRadius = at.covalentRadius
        newat.vdwRadius = at.vdwRadius
        newat.maxBonds = at.maxBonds
        newat.organic = at.organic
        newat.colors = at.colors.copy()
        newat.opacities = at.opacities.copy()
        newat._charges = at._charges.copy()
        newat.chargeSet = at.chargeSet

        # set attributes from PDB parser
        newat.segID = at.segID
        newat.hetatm = at.hetatm
        newat.normalname = at.normalname
        newat.number = num #at.number
        newat.occupancy = at.occupancy
        newat.temperatureFactor = at.temperatureFactor
        newat.altname = at.altname

        # attribute created by PQR parser
        if hasattr(at, 'pqrRadius'):
            newat.pqrRadius = at.pqrRadius

        # attribute created by F2D parser
        if hasattr(at, 'hbstatus'):
            newat.hbstatus = at.hbstatus

        # attribute created by PDBQ parser
        if hasattr(at, 'autodock_element'):
            newat.autodock_element = at.autodock_element

        # attribute created by PDBQT parser
        #if hasattr(at, ''):
        #    newat. = at.

        # attribute created by PDBQS parser
        if hasattr(at, 'AtVol'):
            newat.AtVol = at.AtVol
            newat.AtSolPar = at.AtSolPar

    mol.allAtoms = AtomSet(newats)
    return mol
Exemple #21
0
    def build4LevelsTree(self, subst_chain, atomlines):
        """
        Function to build a 4 level hierarchy Protein-Chain-Residue-Atom.

        """
        self.mol = Protein()
        self.mol.allAtoms = AtomSet()
        self.mol.atmNum = {}
        self.mol.parser = self
        if self.mol.name == 'NoName':
            self.mol.name = os.path.basename(
                os.path.splitext(self.filename)[0])
        self.mol.curChain = Chain()
        self.mol.curRes = Residue()
        self.mol.levels = [Protein, Chain, Residue, Atom]
        i = 1
        for atmline in atomlines:
            if len(atmline) >= 10:
                status = string.split(atmline[9], '|')
            else:
                status = None
            if len(atmline) == 8:
                tmp = [atmline[5][:5], atmline[5][5:]]
                atmline[5] = tmp[0]
                atmline.insert(6, tmp[1])

            if status and status[0] == 'WATER':
                chainID = 'W'
                atmline[7] = 'HOH' + str(i)
                subst_chain[atmline[7]] = chainID
                i = i + 1

            if subst_chain == {}:
                chainID = 'default'

            elif not subst_chain.has_key(atmline[7]):
                if subst_chain.has_key('****'):
                    try:
                        chainID = subst_chain[atmline[7]]
                    except:
                        chainID = 'default'
                else:
                    chainID = 'default'

            elif type(subst_chain[atmline[7]]) is types.StringType:
                # that is to say that only chains has this substructure name.
                chainID = subst_chain[atmline[7]]

            elif type(subst_chain[atmline[7]]) is types.ListType:
                # That is to say that several chains have the same substructure.
                chainID = subst_chain[atmline[7]][0]
                subst_chain[atmline[7]] = subst_chain[atmline[7]].remove(
                    chainID)

            if chainID != self.mol.curChain.id:
                if not self.mol.chains.id or not chainID in self.mol.chains.id:
                    self.mol.curChain = Chain(chainID, self.mol, top=self.mol)
                else:
                    self.mol.curChain = self.mol.chains.get(chainID)[0]

            if len(atmline) < 7:
                # test if the atmline has a res name and resseq:
                resName = 'RES'
                resSeq = '1'
            else:
                resName = atmline[7][:3]
                resSeq = atmline[7][3:]

            if resSeq != self.mol.curRes.number or \
               resName != self.mol.curRes.type:
                # check if this residue already exists
                na = string.strip(resName) + string.strip(resSeq)
                res = self.mol.curChain.get(na)
                if res:
                    self.mol.curRes = res[0]
                else:
                    self.mol.curRes = Residue(resName,
                                              resSeq,
                                              '',
                                              self.mol.curChain,
                                              top=self.mol)
            name = atmline[1]
            if name == 'CA': self.mol.curRes.hasCA = 1
            if name == 'O': self.mol.curRes.hasO = 2
            atom = Atom(name,
                        self.mol.curRes,
                        top=self.mol,
                        chemicalElement=string.split(atmline[5], '.')[0])
            #atom.element = atmline[5][0]
            atom.element = atom.chemElem
            atom.number = int(atmline[0])
            self.mol.atmNum[atom.number] = atom
            atom._coords = [[
                float(atmline[2]),
                float(atmline[3]),
                float(atmline[4])
            ]]
            if len(atmline) >= 9:
                atom._charges['mol2'] = float(atmline[8])
                atom.chargeSet = 'mol2'
#            atom.conformation = 0
            atom.hetatm = 0
            #Add a data member containing a list of string describing
            # the Sybyl status bis of the atoms.
            atom.status = status
            #add altname so buildBondsByDist doesn't croak
            atom.altname = None
            self.mol.allAtoms.append(atom)
        delattr(self.mol, 'curRes')
        delattr(self.mol, 'curChain')
        #rec = rec + ' %-2.2s'%atm.autodock_element.upper()
##         #NB: write 'A' in element slot for aromatic carbons
##         if atm.autodock_element=='A':
##             #in this case, columns 78+79 are blanks
##             rec = rec + 'A  '
##         else:
##             #rec = rec + '%2.2s'%atm.element
##             #5/19:
##             #columns 78+79: autodock_element
##             rec = rec + '%s '%atm.autodock_element
##             #if atm.element!=atm.autodock_element:
##             #    #eg HD or NA or SA or OA, always 2 chars
##             #    rec = rec + '%s '%atm.autodock_element[1]
##             #else:
##             #    rec = rec + '  '
        rec = rec + '\n'
        return rec




if __name__=='__main__':
    from MolKit.protein import Protein
    from MolKit.pdbParser import PdbParser
    mol = Protein()
    mol.read('/tsri/pdb/struct/4tpi.pdb', PdbParser())
    writer = PdbWriter()
    writer.add_userRecord('REMARK', )
    writer.add_userRecord('TITLE ', [('', 'This is the title record\n')])
    writer.write('/home/ktchan/jumble.pdb', mol)
Exemple #23
0
class FloodPlayer(Player):
    def __init__(self, command, file):
        master = command.vf.GUI.ROOT
        self.autoLigandCommand = command.vf.AutoLigandCommand
        self.autoLigandCommand.spheres.Set(visible=1)
        self.autoLigandCommand.halo.Set(visible=1)
        pkl_file = open(file, 'rb')
        self.floods = []
        try:
            data = pickle.load(pkl_file)
        except Exception as inst:
            print("Error loading ", __file__, "\n", inst)
        self.xcent = data[0]
        self.ycent = data[1]
        self.zcent = data[2]
        self.centerx = data[3]
        self.centery = data[4]
        self.centerz = data[5]
        self.spacing = data[6]
        self.centers = []
        data = pickle.load(pkl_file)
        self.floods.append(data[1])
        try:
            while data:
                data = pickle.load(pkl_file)
                flood = copy.copy(self.floods[-1])
                for item in data[0]:
                    flood.remove(item)
                for item in data[1]:
                    flood.append(item)
                self.floods.append(flood)
        except EOFError:
            pass
        pkl_file.close()
        fileName = os.path.splitext(os.path.split(file)[-1])[0]
        self.mol = Protein(fileName)
        self.mol.allAtoms = AtomSet([])
        chain = Chain()
        self.residue = Residue(type="UNK")
        chain.adopt(self.residue, setChildrenTop=1)
        self.mol.adopt(chain, setChildrenTop=1)
        self.mol.parser = None
        self.filename = file
        fl = self.floods[0][0]
        x = (fl[1] - self.xcent) * self.spacing + self.centerx
        y = (fl[2] - self.ycent) * self.spacing + self.centery
        z = (fl[3] - self.zcent) * self.spacing + self.centerz
        if fl[4] == 7:
            atomchr = 'P'
            # note, this will color the NA atom pink (the PDB color for Phosphorus)
            radius = AAradii[13][0]
        if fl[4] == 6:
            atomchr = 'S'
            radius = AAradii[13][0]
        if fl[4] == 5:
            atomchr = 'A'
            radius = AAradii[10][0]
        if fl[4] == 4:
            atomchr = 'O'
            radius = AAradii[1][0]
        if fl[4] == 3:
            atomchr = 'N'
            radius = AAradii[4][0]
        if fl[4] == 2:
            atomchr = 'C'
            radius = AAradii[10][0]
        if fl[4] == 1:
            atomchr = 'H'
            radius = AAradii[15][0]
        a = Atom(atomchr, self.residue, atomchr, top=self.mol)
        a._coords = [[x, y, z]]
        a._charges = {}
        a.hetatm = 1
        a.number = 0
        a.radius = radius
        self.mol.allAtoms = self.residue.atoms
        self.mol = self.autoLigandCommand.vf.addMolecule(self.mol, False)
        self.mol.levels = [Protein, Chain, Residue, Atom]
        self.autoLigandCommand.vf.displayCPK(self.mol, scaleFactor=0.4)
        self.autoLigandCommand.vf.colorByAtomType(self.mol, ['cpk'], log=0)
        self.autoLigandCommand.vf.displayLines(self.mol,
                                               negate=True,
                                               displayBO=False,
                                               lineWidth=2,
                                               log=0,
                                               only=False)
        self.colorKeys = list(a.colors.keys())
        maxLen = len(self.floods) - 1
        Player.__init__(self,
                        master=master,
                        endFrame=maxLen,
                        maxFrame=maxLen,
                        titleStr="AutoLigand Flood Player",
                        hasSlider=True)
        try:  # withdrew SetAnim button
            self.form.ifd.entryByName['setanimB']['widget'].grid_forget()
            self.form.autoSize()
        except:
            pass
        self.nextFrame(0)
        self.form.root.protocol('WM_DELETE_WINDOW', self.hide_cb)

    def nextFrame(self, id):
        #Player.nextFrame(self, id)
        id = int(id)
        if id == self.currentFrameIndex: return
        if self.hasCounter and self.gui:
            self.form.ent2.delete(0, 'end')
            self.form.ent2.insert(0, str(id))
            if self.hasSlider:
                self.form.ifd.entryByName['slider']['widget'].set(id)
        self.currentFrameIndex = int(id)
        removeAtoms = AtomSet([])
        addAtoms = AtomSet([])

        id = int(id)
        flood = self.floods[id]
        centers = []
        materials = []
        radii = []
        prev_coords = self.mol.allAtoms.coords
        lenAtoms = len(prev_coords)
        #self.residue.atoms = AtomSet([])
        index = 0
        #h = self.hp.heap()
        #print h
        for fl in flood:
            x = (fl[1] - self.xcent) * self.spacing + self.centerx
            y = (fl[2] - self.ycent) * self.spacing + self.centery
            z = (fl[3] - self.zcent) * self.spacing + self.centerz
            if fl[4] == 7:
                atomchr = 'P'
                # note, this will color the NA atom pink (the PDB color for Phosphorus)
                radius = AAradii[13][0]
            if fl[4] == 6:
                atomchr = 'S'
                radius = AAradii[13][0]
            if fl[4] == 5:
                atomchr = 'A'
                radius = AAradii[10][0]
            if fl[4] == 4:
                atomchr = 'O'
                radius = AAradii[1][0]
            if fl[4] == 3:
                atomchr = 'N'
                radius = AAradii[4][0]
            if fl[4] == 2:
                atomchr = 'C'
                radius = AAradii[10][0]
            if fl[4] == 1:
                atomchr = 'H'
                radius = AAradii[15][0]
            if not [x, y, z] in prev_coords:
                a = Atom(atomchr, self.residue, atomchr, top=self.mol)
                a._coords = [[x, y, z]]
                a._charges = {}
                a.hetatm = 1
                a.radius = radius
                #a.number = lenAtoms + 1
                addAtoms.append(a)
                lenAtoms += 1
                for key in self.colorKeys:
                    a.colors[key] = AtomElements[atomchr]
                    a.opacities[key] = 1.0
            else:
                centers.append([x, y, z])


#            a = Atom(atomchr, self.residue, atomchr, top=self.mol)
#            a._coords = [[x,y,z]]
#            a._charges = {}
#            a.hetatm = 1
#            a.number = index
#            index += 1
#aterials.append(AtomElements[atomchr])
#enters.append([x,y,z])
#adii.append(radius)
#self.mol.allAtoms = self.residue.atoms
#self.mol.geomContainer.geoms['lines'].protected = False
#for com in self.autoLigandCommand.vf.cmdsWithOnAddObj:
#    com.onAddObjectToViewer(self.mol)
#self.autoLigandCommand.vf.displayCPK(self.mol, scaleFactor=0.1)

        halo_centers = []
        for coord in prev_coords:
            if not coord in centers:
                index = prev_coords.index(coord)
                removeAtoms.append(self.mol.allAtoms[index])

        self.residue.assignUniqIndex(
        )  #this is needed to avoid Traceback later on
        self.mol.allAtoms.stringRepr = None  #stringRepr can be very large aousing memory errors
        event = AddAtomsEvent(objects=addAtoms)
        #self.autoLigandCommand.vf.dispatchEvent(event)
        self.autoLigandCommand.vf.displayCPK.updateGeom(event)
        event = DeleteAtomsEvent(objects=removeAtoms)
        #self.autoLigandCommand.vf.dispatchEvent(event)
        self.autoLigandCommand.vf.displayCPK.updateGeom(event)
        for atom in removeAtoms:
            self.residue.atoms.remove(atom)
        if id == self.maxFrame:
            self.autoLigandCommand.halo.Set(visible=0)
        else:
            self.autoLigandCommand.halo.Set(centers=addAtoms.coords,
                                            materials=((1, 1, 0, 0.5), ),
                                            radii=0.4)

        #self.mol.allAtoms = self.residue.atoms

        #self.vf.GUI.VIEWER.Redraw()
        #self.vf.GUI.ROOT.update()

    def hide_cb(self):
        self.autoLigandCommand.hideGeoms()
        self.form.destroy()