def buildPdb(map_dict, npts, name='DlgBuilt', ctr=0, outputfile='results.pdb', scale=1.0): if debug: print "in buildPdb: tolerance=", tolerance name = 'DlgBuilt' mol = Protein(name=name) mol.curChain = Chain() mol.chains = ChainSet([mol.curChain]) mol.curRes = Residue() mol.curChain.adopt(mol.curRes) mol.allAtoms = AtomSet() mol.curRes.atoms = mol.allAtoms nzpts=nypts=nxpts = npts #nxpts, nypts, nzpts = npts ctr = 0 for ADtype, m in map_dict.items(): if debug: print "PROCESSING ", ADtype, " array:", max(m.ravel()), ':', min(m.ravel()) vals = [] tctr = 0 #for number of each type for z in range(nzpts): for y in range(nypts): for x in range(nxpts): val = scale * abs(m[x,y,z]) vals.append(val) #if abs(val)>.005: if val>tolerance*scale: ctr += 1 name = ADtype + str(ctr) #version3: #info_lo = (xcen - numxcells*spacing, # ycen - numycells*spacing, # zcen - numzcells *spacing) #using lower back pt of cube, i think #xcoord = (x-info_lo[0])/spacing #ycoord = (y-info_lo[1])/spacing #zcoord = (z-info_lo[2])/spacing #version2: xcoord = (x-numxcells)*spacing + xcen ycoord = (y-numycells)*spacing + ycen zcoord = (z-numzcells)*spacing + zcen coords = (xcoord,ycoord,zcoord) tctr += 1 # #print "addAtom: name=",name,"ADtype=", ADtype," val=", val, "coords=", coords,"ctr=", ctr addAtom(mol, name, ADtype, val, coords, ctr) print "added ",tctr, '<-', ADtype, " atoms" if debug: print ADtype, ':', tctr , ' ', ctr print "total atoms=", ctr writer = PdbWriter() writer.write(outputfile, mol.allAtoms, records=['ATOM'])
def test_secondaryStructure(): from MolKit.pdbParser import PdbParser from MolKit.protein import Protein print 'create an object Protein crn' crn = Protein() print 'read the pdb file' crn.read('/tsri/pdb/struct/1crn.pdb', PdbParser()) print 'create an object secondarystructureSet for each chain of crn' crn.getSS() print 'create the geometries for each structures of crn' extrudestructure = [] for c in range(len(crn.chains)): for i in range(len(crn.chains[c].secondarystructureset)): extrudestructure.append(crn.chains[c].secondarystructureset[i].extrudeSS())
def copy(self, newname=None): """copy makes a new Protein instance with 'newname' and other protein level parameters from self. Next,self.allAtoms is copied atom by atom. First: '_fit_atom_into_tree', which uses the same logic as pdbParser, builds up new instances of residues and chains as necessary. Then: _copy_atom_attr copies the remaining String, Int, Float, None, List and Tuple attributes into new atom instances. The new molecule is returned by copy. NB: subsequently the two copies can be visualized: copy2=mv.Mols[0].copy() mv.addMolecule(copy2) mv.GUI.VIEWER.TransformRootOnly( yesno=0) mv.GUI.VIEWER.currentObject=copy2.geomContainer.geoms['master'] then mouse movements would move only copy2, the new object """ if not newname: newname = self.name + "_copy" newmol=Protein(name=newname, parent=self.parent, elementType=self.elementType, childrenName=self.childrenName, setClass=self.setClass, childrenSetClass=self.childrenSetClass, top=self.top) newmol.curChain=Chain() newmol.curRes=Residue() newmol.allAtoms= AtomSet() newmol.parser = self.parser for at in self.allAtoms: self._fit_atom_into_tree(newmol, at) newmol.buildBondsByDistance() return newmol
def buildPdb(map_dict, npts, name='DlgBuilt', ctr=0, outputfile='results.pdb', scale=1.0): if debug: print "in buildPdb: tolerance=", tolerance name = 'DlgBuilt' mol = Protein(name=name) mol.curChain = Chain() mol.chains = ChainSet([mol.curChain]) mol.curRes = Residue() mol.curChain.adopt(mol.curRes) mol.allAtoms = AtomSet() mol.curRes.atoms = mol.allAtoms nzpts = nypts = nxpts = npts #nxpts, nypts, nzpts = npts ctr = 0 for ADtype, m in map_dict.items(): if debug: print "PROCESSING ", ADtype, " array:", max(m.ravel()), ':', min( m.ravel()) vals = [] tctr = 0 #for number of each type for z in range(nzpts): for y in range(nypts): for x in range(nxpts): val = scale * abs(m[x, y, z]) vals.append(val) #if abs(val)>.005: if val > tolerance * scale: ctr += 1 name = ADtype + str(ctr) #version3: #info_lo = (xcen - numxcells*spacing, # ycen - numycells*spacing, # zcen - numzcells *spacing) #using lower back pt of cube, i think #xcoord = (x-info_lo[0])/spacing #ycoord = (y-info_lo[1])/spacing #zcoord = (z-info_lo[2])/spacing #version2: xcoord = (x - numxcells) * spacing + xcen ycoord = (y - numycells) * spacing + ycen zcoord = (z - numzcells) * spacing + zcen coords = (xcoord, ycoord, zcoord) tctr += 1 # #print "addAtom: name=",name,"ADtype=", ADtype," val=", val, "coords=", coords,"ctr=", ctr addAtom(mol, name, ADtype, val, coords, ctr) print "added ", tctr, '<-', ADtype, " atoms" if debug: print ADtype, ':', tctr, ' ', ctr print "total atoms=", ctr writer = PdbWriter() writer.write(outputfile, mol.allAtoms, records=['ATOM'])
def build2LevelsTree(self, atomlines): """ Function to build a two level tree. """ print 'try to build a 2 level tree' self.mol = Molecule() self.mol.allAtoms = AtomSet() self.mol.atmNum = {} self.mol.parser = self if self.mol.name == 'NoName': self.mol.name = os.path.basename( os.path.splitext(self.filename)[0]) self.mol.children = AtomSet([]) self.mol.childrenName = 'atoms' self.mol.childrenSetClass = AtomSet self.mol.elementType = Atom self.mol.levels = [Molecule, Atom] ##1/18:self.mol.levels = [Protein, Atom] for atmline in atomlines: atom = Atom(atmline[1], self.mol, chemicalElement=string.split(atmline[5], '.')[0], top=self.mol) #atom.element = atmline[5][0] atom.element = atom.chemElem atom.number = int(atmline[0]) self.mol.atmNum[atom.number] = atom atom._coords = [[ float(atmline[2]), float(atmline[3]), float(atmline[4]) ]] if len(atmline) >= 9: atom._charges['mol2'] = float(atmline[8]) atom.chargeSet = 'mol2' # atom.conformation = 0 atom.hetatm = 0 #add altname so buildBondsByDist doesn't croak atom.altname = None self.mol.allAtoms.append(atom) self.mol.atoms = self.mol.children
def build2LevelsTree (self, atomlines): """ Function to build a two level tree. """ print 'try to build a 2 level tree' self.mol= Molecule() self.mol.allAtoms = AtomSet() self.mol.atmNum = {} self.mol.parser = self if self.mol.name == 'NoName': self.mol.name = os.path.basename(os.path.splitext (self.filename)[0]) self.mol.children = AtomSet([]) self.mol.childrenName = 'atoms' self.mol.childrenSetClass = AtomSet self.mol.elementType = Atom self.mol.levels = [Molecule, Atom] ##1/18:self.mol.levels = [Protein, Atom] for atmline in atomlines: atom = Atom(atmline[1], self.mol, chemicalElement = string.split(atmline[5], '.')[0], top = self.mol) #atom.element = atmline[5][0] atom.element = atom.chemElem atom.number = int(atmline[0]) self.mol.atmNum[atom.number] = atom atom._coords = [ [float(atmline[2]), float(atmline[3]), float(atmline[4]) ] ] if len(atmline)>=9: atom._charges['mol2'] = float(atmline[8]) atom.chargeSet = 'mol2' # atom.conformation = 0 atom.hetatm = 0 #add altname so buildBondsByDist doesn't croak atom.altname = None self.mol.allAtoms.append(atom) self.mol.atoms = self.mol.children
def parse(self, objClass=Protein): if self.allLines is None and self.filename: self.readFile() if self.allLines is None or len(self.allLines) == 0: return mol = Protein() self.mol = mol molList = mol.setClass() molList.append(mol) current_residue_number = None current_chain = None current_residue = None number_of_atoms = int(self.allLines[1][:5]) self.configureProgressBar(init=1, mode='increment', authtext='parse atoms', max=number_of_atoms) current_chain = Chain(id='GRO', ) # FIX this: The existence of allAtoms attribute (and the fact that it is an empty set rather than all atoms in # the chain) causes getNodesByMolecule() to return wrong values if hasattr(current_chain, "allAtoms"): del current_chain.allAtoms # current_chain = Chain( id='GRO',parent = mol) mol.adopt(current_chain, setChildrenTop=1) for index in range(2, number_of_atoms + 2): residue_number = int(self.allLines[index][:5]) if residue_number != current_residue_number: # # current_chain should adopt the current residue if there is one # create new residue res_type = self.allLines[index][5:10] residue_type = res_type.split(' ')[0] current_residue = Residue(type=residue_type, number=residue_number) current_residue_number = residue_number if current_residue is not None: # REMEMBER TO ADOPT THE LAST ONE!!! current_chain.adopt(current_residue, setChildrenTop=1) n = self.allLines[index][10:15] name = n.split(' ')[-1] element = name if element in list(babel_elements.keys()): element = element else: if residue_type == "System" or residue_type == "SOL": # if element[1] == 'W': # element = 'H' # group is treated as one particle # else: element = element[0] elif element[:2] == 'Me': element = 'C' else: element = element[0] # if len(element)>1: # if type(element[1]) == types.StringType: # # if element[1] == element[1].lower(): # element =element # else: # element = element[0] # # else: # element = element[0] atom = Atom(name, current_residue, element, top=mol) c = self.allLines[index][15:20] cx = self.allLines[index][20:28] cy = self.allLines[index][28:36] cz = self.allLines[index][36:44] x = float(cx) * 10 y = float(cy) * 10 z = float(cz) * 10 atom._coords = [[x, y, z]] atom._charges = [] atom.segID = mol.name atom.normalname = name atom.number = int(self.allLines[index][15:20]) atom.elementType = name[0] mol.atmNum[atom.number] = atom atom.altname = None atom.hetatm = 0 mol.name = os.path.split(os.path.splitext(self.filename)[0])[-1] mol.allAtoms = mol.chains.residues.atoms mol.parser = self mol.levels = [Protein, Chain, Residue, Atom] name = '' for n in molList.name: name = n + ',' name = name[:-1] molList.setStringRepr(name) strRpr = name + ':::' molList.allAtoms.setStringRepr(strRpr) for m in molList: mname = m.name strRpr = mname + ':::' m.allAtoms.setStringRepr(strRpr) strRpr = mname + ':' m.chains.setStringRepr(strRpr) for c in m.chains: cname = c.id strRpr = mname + ':' + cname + ':' c.residues.setStringRepr(strRpr) for r in c.residues: rname = r.name strRpr = mname + ':' + cname + ':' + rname + ':' r.atoms.setStringRepr(strRpr) return molList
#5/19: rec = rec + ' %-2.2s' % atm.autodock_element #rec = rec + ' %-2.2s'%atm.autodock_element.upper() ## #NB: write 'A' in element slot for aromatic carbons ## if atm.autodock_element=='A': ## #in this case, columns 78+79 are blanks ## rec = rec + 'A ' ## else: ## #rec = rec + '%2.2s'%atm.element ## #5/19: ## #columns 78+79: autodock_element ## rec = rec + '%s '%atm.autodock_element ## #if atm.element!=atm.autodock_element: ## # #eg HD or NA or SA or OA, always 2 chars ## # rec = rec + '%s '%atm.autodock_element[1] ## #else: ## # rec = rec + ' ' rec = rec + '\n' return rec if __name__ == '__main__': from MolKit.protein import Protein from MolKit.pdbParser import PdbParser mol = Protein() mol.read('/tsri/pdb/struct/4tpi.pdb', PdbParser()) writer = PdbWriter() writer.add_userRecord('REMARK', ) writer.add_userRecord('TITLE ', [('', 'This is the title record\n')]) writer.write('/home/ktchan/jumble.pdb', mol)
def getMolecule(self, molInd): molecules = [] if molInd == len(self.molIndex) - 1: lastLine = -1 else: lastLine = self.molIndex[molInd + 1] # lines fotr that molecule lines = self.allLines[self.molIndex[molInd]:lastLine] lineIndex = 0 atomsSeen = {} # dict of atom types and number of atoms seen # parser header molName = lines[lineIndex].strip() lineIndex += 3 # create molecule mol = Protein(name=molName) mol.info = lines[lineIndex + 1] mol.comment = lines[lineIndex + 1] #self.mol.parser = self chain = Chain(id='1', parent=mol, top=mol) res = Residue(type='UNK', number='1', parent=chain, top=mol) mol.levels = [Protein, Chain, Residue, Atom] # parse count line line = lines[lineIndex] assert line[ 33: 39] == " V2000", "Format error: only V2000 is suported, got %s" % line[ 33:39] nba = int(line[0:3]) # number of atoms nbb = int(line[3:6]) # number of bonds nbal = int(line[6:9]) # number of atom lists ccc = int(line[12:15]) # chiral flag: 0=not chiral, 1=chiral sss = int(line[15:18]) # number of stext entries lineIndex += 1 # parse atoms for anum in range(nba): line = lines[lineIndex] element = line[31:34].strip() if element in atomsSeen: atomsSeen[element] += 1 else: atomsSeen[element] = 1 atom = Atom(name='%s_%s' % (element, atomsSeen[element]), parent=res, chemicalElement=element, top=mol) atom._coords = [[ float(line[0:10]), float(line[10:20]), float(line[20:30]) ]] atom._charges['sdf'] = int(line[35:38]) atom.chargeSet = 'sdf' mol.allAtoms.append(atom) atom.massDiff = int(line[34:36]) atom.stereo = int(line[38:41]) atom.hcount = line[41:44] atom.valence = int(line[47:50]) atom.hetatm = 1 atom.occupancy = 0.0 atom.temperatureFactor = 0.0 lineIndex += 1 # parse bonds for bnum in range(nba): line = lines[lineIndex] at1 = mol.allAtoms[int(line[0:3]) - 1] at2 = mol.allAtoms[int(line[3:6]) - 1] if at1.isBonded(at2): continue bond = Bond(at1, at2, check=0) bond.bondOrder = int(line[6:9]) #1 = Single, 2 = Double, #3 = Triple, 4 = Aromatic, #5 = Single or Double, #6 = Single or Aromatic, #7 = Double or Aromatic, 8 = Any bond.stereo = int(line[9:12]) #Single bonds: 0 = not stereo, #1 = Up, 4 = Either, #6 = Down, Double bonds: 0 = Use x-, y-, z-coords #from atom block to determine cis or trans, #3 = Cis or trans (either) double bond bond.topo = int(line[15:18]) # 0 = Either, 1 = Ring, 2 = Chain try: bond.ReactionCenter = int(line[18:21]) except ValueError: bond.ReactionCenter = 0 #0 = unmarked, 1 = a center, -1 = not a center, #Additional: 2 = no change, #4 = bond made/broken, #8 = bond order changes #12 = 4+8 (both made/broken and changes); #5 = (4 + 1), 9 = (8 + 1), and 13 = (12 + 1) # "M END" and properties are not parsed at this point self.mol = mol mname = mol.name strRpr = mname + ':::' mol.allAtoms.setStringRepr(strRpr) strRpr = mname + ':' mol.chains.setStringRepr(strRpr) for c in mol.chains: cname = c.id strRpr = mname + ':' + cname + ':' c.residues.setStringRepr(strRpr) for r in c.residues: rname = r.name strRpr = mname + ':' + cname + ':' + rname + ':' r.atoms.setStringRepr(strRpr) molList = mol.setClass() molList.append(mol) mol.parser = self for n in molList.name: name = n + ',' name = name[:-1] molList.setStringRepr(name) strRpr = name + ':::' molList.allAtoms.setStringRepr(strRpr) return molList
def parse(self, objClass=Protein): """Parses mmCIF dictionary (self.mmCIF_dict) into MolKit object""" if self.allLines is None and self.filename: self.readFile() if self.allLines is None or len(self.allLines)==0: return self.mmCIF2Dict() type_symbol = None B_iso_or_equiv = None mmCIF_dict = self.mmCIF_dict fileName, fileExtension = os.path.splitext(self.filename) molName = os.path.basename(fileName) if mmCIF_dict.has_key('_entry.id'): molName = mmCIF_dict['_entry.id'] if mmCIF_dict.has_key('_atom_site.id'): #The description of the data names can be found in the following link #http://mmcif.pdb.org/dictionaries/mmcif_pdbx.dic/Items ids = mmCIF_dict['_atom_site.id'] #1 number group_PDB = mmCIF_dict['_atom_site.group_PDB'] #2 atom/hetatm atom_id = mmCIF_dict['_atom_site.label_atom_id'] #3 name comp_id = mmCIF_dict['_atom_site.label_comp_id'] #4 residue type label_asym_id = mmCIF_dict['_atom_site.label_asym_id'] #5 chain #Note: chain ID from mmCIF file might be different from PDB file seq_id = mmCIF_dict['_atom_site.label_seq_id'] #6 residue number x_coords = mmCIF_dict['_atom_site.Cartn_x'] #7 xcoord y_coords = mmCIF_dict['_atom_site.Cartn_y'] #8 ycoord z_coords = mmCIF_dict['_atom_site.Cartn_z'] #9 zcoord occupancy = mmCIF_dict['_atom_site.occupancy'] #10 B_iso_or_equiv = mmCIF_dict['_atom_site.B_iso_or_equiv']#11 type_symbol = mmCIF_dict['_atom_site.type_symbol'] elif mmCIF_dict.has_key('_atom_site_label'): #ftp://ftp.iucr.org/pub/cif_core.dic atom_id = mmCIF_dict['_atom_site_label'] len_atoms = len(atom_id) ids = range(len_atoms) group_PDB = len_atoms*['HETATM'] comp_id = len_atoms*["CIF"] label_asym_id = len_atoms*['1'] seq_id = len_atoms*[1] from mglutil.math.crystal import Crystal a = mmCIF_dict['_cell.length_a'] = float(mmCIF_dict['_cell_length_a'].split('(')[0]) b = mmCIF_dict['_cell.length_b'] = float(mmCIF_dict['_cell_length_b'].split('(')[0]) c = mmCIF_dict['_cell.length_c'] = float(mmCIF_dict['_cell_length_c'].split('(')[0]) alpha = mmCIF_dict['_cell.angle_alpha'] = float(mmCIF_dict['_cell_angle_alpha'].split('(')[0]) beta = mmCIF_dict['_cell.angle_beta'] = float(mmCIF_dict['_cell_angle_beta'].split('(')[0]) gamma = mmCIF_dict['_cell.angle_gamma'] = float(mmCIF_dict['_cell_angle_gamma'].split('(')[0]) cryst = Crystal((a, b, c), (alpha, beta, gamma)) x = [] for item in mmCIF_dict['_atom_site_fract_x']: x.append(float(item.split('(')[0])) y = [] for item in mmCIF_dict['_atom_site_fract_y']: y.append(float(item.split('(')[0])) z = [] for item in mmCIF_dict['_atom_site_fract_z']: z.append(float(item.split('(')[0])) x_coords = [] y_coords = [] z_coords = [] B_iso_or_equiv = [] for i in ids: trans = cryst.toCartesian([x[i], y[i], z[i]]) x_coords.append(trans[0]) y_coords.append(trans[1]) z_coords.append(trans[2]) if mmCIF_dict.has_key('_atom_site_U_iso_or_equiv'): B_iso_or_equiv.append(mmCIF_dict['_atom_site_U_iso_or_equiv'][i].split('(')[0]) if mmCIF_dict.has_key('_atom_site_type_symbol'): type_symbol = mmCIF_dict['_atom_site_type_symbol'] if mmCIF_dict.has_key('_atom_site_occupancy'): occupancy = mmCIF_dict['_atom_site_occupancy'] if mmCIF_dict.has_key('_chemical_name_common'): molName = mmCIF_dict['_chemical_name_common'] elif mmCIF_dict.has_key('_chemical_name_mineral'): molName = mmCIF_dict['_chemical_name_mineral'] if mmCIF_dict.has_key('_symmetry_space_group_name_H-M'): mmCIF_dict['_symmetry.space_group_name_H-M'] = mmCIF_dict['_symmetry_space_group_name_H-M'] else: print 'No _atom_site.id or _atom_site_label record is available in %s' % self.filename return None mol = Protein() self.mol = mol self.mol.allAtoms = AtomSet([]) molList = mol.setClass() molList.append( mol ) current_chain_id = None current_residue_number = None current_chain = None current_residue = None number_of_atoms = len(ids) self.configureProgressBar(init=1, mode='increment', authtext='parse atoms', max=number_of_atoms) for index in range(number_of_atoms): #make a new atom for the current index chain_id = label_asym_id[index] if chain_id != current_chain_id: #make a new chain #molecule should adopt the current chain if there is one current_chain = Chain(id=chain_id) # FIXME: current_chain should not have allAtoms attribute delattr(current_chain, "allAtoms") current_chain_id = chain_id if current_chain is not None: #REMEMBER TO ADOPT THE LAST ONE!!! mol.adopt(current_chain, setChildrenTop=1) residue_number = seq_id[index] if residue_number != current_residue_number or chain_id != label_asym_id[index-1]: #make a new chain: #current_chain should adopt the current residue if there is one #create new residue residue_type = comp_id[index] current_residue = Residue(type=residue_type, number=residue_number) current_residue_number = residue_number if current_residue is not None: #REMEMBER TO ADOPT THE LAST ONE!!! current_chain.adopt(current_residue, setChildrenTop=1) name = atom_id[index] if type_symbol: element = type_symbol[index] else: element = None atom = Atom( name, current_residue, element, top=mol ) atom._coords = [[float(x_coords[index]), float(y_coords[index]), float(z_coords[index])]] atom._charges = {} atom.segID = mol.name atom.normalname = name atom.number = int(ids[index]) mol.atmNum[atom.number] = atom atom.occupancy = float(occupancy[index]) if B_iso_or_equiv: atom.temperatureFactor = float(B_iso_or_equiv[index]) atom.altname = None atom.hetatm = 0 if group_PDB[index]=='HETATM': atom.hetatm = 1 self.updateProgressBar() self.parse_MMCIF_CELL() try: self.parse_MMCIF_HYDBND() except: print >>sys.stderr,"Parsing Hydrogen Bond Record Failed in",self.filename mol.name = molName mol.allAtoms = mol.chains.residues.atoms mol.parser = self mol.levels = [Protein, Chain, Residue, Atom] name = '' for n in molList.name: name = n + ',' name = name[:-1] molList.setStringRepr(name) strRpr = name + ':::' molList.allAtoms.setStringRepr(strRpr) for m in molList: mname = m.name strRpr = mname + ':::' m.allAtoms.setStringRepr(strRpr) strRpr = mname + ':' m.chains.setStringRepr(strRpr) for c in m.chains: cname = c.id strRpr = mname + ':' + cname + ':' c.residues.setStringRepr(strRpr) for r in c.residues: rname = r.name strRpr = mname + ':' + cname + ':' + rname + ':' r.atoms.setStringRepr(strRpr) self.buildBonds() return molList
class FloodPlayer(Player): def __init__(self, command, file): master = command.vf.GUI.ROOT self.autoLigandCommand = command.vf.AutoLigandCommand self.autoLigandCommand.spheres.Set(visible=1) self.autoLigandCommand.halo.Set(visible=1) pkl_file = open(file, 'rb') self.floods = [] try: data = cPickle.load(pkl_file) except Exception, inst: print "Error loading ", __file__, "\n", inst self.xcent = data[0] self.ycent = data[1] self.zcent = data[2] self.centerx = data[3] self.centery = data[4] self.centerz = data[5] self.spacing = data[6] self.centers = [] data = cPickle.load(pkl_file) self.floods.append(data[1]) try: while data: data = cPickle.load(pkl_file) flood = copy.copy(self.floods[-1]) for item in data[0]: flood.remove(item) for item in data[1]: flood.append(item) self.floods.append(flood) except EOFError: pass pkl_file.close() fileName = os.path.splitext(os.path.split(file)[-1])[0] self.mol = Protein(fileName) self.mol.allAtoms = AtomSet([]) chain = Chain() self.residue = Residue(type="UNK") chain.adopt(self.residue, setChildrenTop=1) self.mol.adopt(chain, setChildrenTop=1) self.mol.parser = None self.filename = file fl = self.floods[0][0] x = (fl[1] - self.xcent) * self.spacing + self.centerx y = (fl[2] - self.ycent) * self.spacing + self.centery z = (fl[3] - self.zcent) * self.spacing + self.centerz if fl[4] == 7: atomchr = 'P' # note, this will color the NA atom pink (the PDB color for Phosphorus) radius = AAradii[13][0] if fl[4] == 6: atomchr = 'S' radius = AAradii[13][0] if fl[4] == 5: atomchr = 'A' radius = AAradii[10][0] if fl[4] == 4: atomchr = 'O' radius = AAradii[1][0] if fl[4] == 3: atomchr = 'N' radius = AAradii[4][0] if fl[4] == 2: atomchr = 'C' radius = AAradii[10][0] if fl[4] == 1: atomchr = 'H' radius = AAradii[15][0] a = Atom(atomchr, self.residue, atomchr, top=self.mol) a._coords = [[x, y, z]] a._charges = {} a.hetatm = 1 a.number = 0 a.radius = radius self.mol.allAtoms = self.residue.atoms self.mol = self.autoLigandCommand.vf.addMolecule(self.mol, False) self.mol.levels = [Protein, Chain, Residue, Atom] self.autoLigandCommand.vf.displayCPK(self.mol, scaleFactor=0.4) self.autoLigandCommand.vf.colorByAtomType(self.mol, ['cpk'], log=0) self.autoLigandCommand.vf.displayLines(self.mol, negate=True, displayBO=False, lineWidth=2, log=0, only=False) self.colorKeys = a.colors.keys() maxLen = len(self.floods) - 1 Player.__init__(self, master=master, endFrame=maxLen, maxFrame=maxLen, titleStr="AutoLigand Flood Player", hasSlider=True) try: # withdrew SetAnim button self.form.ifd.entryByName['setanimB']['widget'].grid_forget() self.form.autoSize() except: pass self.nextFrame(0) self.form.root.protocol('WM_DELETE_WINDOW', self.hide_cb)
def build4LevelsTree(self, subst_chain, atomlines): """ Function to build a 4 level hierarchy Protein-Chain-Residue-Atom. """ self.mol= Protein() self.mol.allAtoms = AtomSet() self.mol.atmNum = {} self.mol.parser = self if self.mol.name == 'NoName': self.mol.name = os.path.basename(os.path.splitext (self.filename)[0]) self.mol.curChain = Chain() self.mol.curRes = Residue() self.mol.levels = [Protein, Chain, Residue, Atom] i = 1 for atmline in atomlines: if len(atmline)>= 10: status = string.split(atmline[9], '|') else: status = None if len(atmline) == 8: tmp = [atmline[5][:5], atmline[5][5:]] atmline[5] = tmp[0] atmline.insert(6, tmp[1]) if status and status[0]=='WATER': chainID = 'W' atmline[7] = 'HOH'+str(i) subst_chain[atmline[7]] = chainID i = i+1 if subst_chain == {}: chainID = 'default' elif not subst_chain.has_key(atmline[7]): if subst_chain.has_key('****'): try: chainID = subst_chain[atmline[7]] except: chainID = 'default' else: chainID = 'default' elif type(subst_chain[atmline[7]]) is types.StringType: # that is to say that only chains has this substructure name. chainID = subst_chain[atmline[7]] elif type(subst_chain[atmline[7]]) is types.ListType: # That is to say that several chains have the same substructure. chainID = subst_chain[atmline[7]][0] subst_chain[atmline[7]] = subst_chain[atmline[7]].remove(chainID) if chainID != self.mol.curChain.id: if not self.mol.chains.id or not chainID in self.mol.chains.id: self.mol.curChain = Chain(chainID, self.mol, top = self.mol) else: self.mol.curChain = self.mol.chains.get(chainID)[0] if len(atmline)<7: # test if the atmline has a res name and resseq: resName = 'RES' resSeq = '1' else: resName = atmline[7][:3] resSeq = atmline[7][3:] if resSeq != self.mol.curRes.number or \ resName != self.mol.curRes.type: # check if this residue already exists na = string.strip(resName) + string.strip(resSeq) res = self.mol.curChain.get( na ) if res: self.mol.curRes = res[0] else: self.mol.curRes = Residue(resName, resSeq, '', self.mol.curChain, top = self.mol) name = atmline[1] if name == 'CA': self.mol.curRes.hasCA = 1 if name == 'O' : self.mol.curRes.hasO = 2 atom = Atom(name, self.mol.curRes, top = self.mol, chemicalElement = string.split(atmline[5], '.')[0]) #atom.element = atmline[5][0] atom.element = atom.chemElem atom.number = int(atmline[0]) self.mol.atmNum[atom.number] = atom atom._coords = [ [float(atmline[2]), float(atmline[3]), float(atmline[4]) ] ] if len(atmline)>=9: atom._charges['mol2'] = float(atmline[8]) atom.chargeSet = 'mol2' # atom.conformation = 0 atom.hetatm = 0 #Add a data member containing a list of string describing # the Sybyl status bis of the atoms. atom.status = status #add altname so buildBondsByDist doesn't croak atom.altname = None self.mol.allAtoms.append(atom) delattr(self.mol, 'curRes') delattr(self.mol, 'curChain')
def build3LevelsTree(self,atomlines): """ Function to build a 3 levels hierarchy Molecule-substructure-atoms.""" self.mol= Protein() self.mol.allAtoms = AtomSet() self.mol.atmNum = {} self.mol.parser = self if self.mol.name == 'NoName': self.mol.name = os.path.basename(os.path.splitext (self.filename)[0]) self.mol.children = ResidueSet([]) self.mol.childrenName = 'residues' self.mol.childrenSetClass = ResidueSet self.mol.elementType = Residue self.mol.curRes = Residue() self.mol.curRes.hasCA = 0 self.mol.curRes.hasO = 0 self.mol.levels = [Protein, Residue, Atom] for atmline in atomlines: if len(atmline)>= 10: status = string.split(atmline[9], '|') else: status = None resName = atmline[7][:3] resSeq = atmline[7][3:] if resSeq != self.mol.curRes.number or \ resName != self.mol.curRes.type: # check if this residue already exists na = string.strip(resName) + string.strip(resSeq) res = self.mol.get(na) if res: self.mol.curRes = res[0] else: self.mol.curRes = Residue(resName, resSeq, '', self.mol, top = self.mol) name = atmline[1] if name == 'CA': self.mol.curRes.hasCA = 1 if name == 'O' : self.mol.curRes.hasO = 2 atom = Atom(name, self.mol.curRes, top = self.mol, chemicalElement = string.split(atmline[5], '.')[0]) #atom.element = atmline[5][0] atom.element = atom.chemElem atom.number = int(atmline[0]) self.mol.atmNum[atom.number] = atom atom._coords = [ [float(atmline[2]), float(atmline[3]), float(atmline[4]) ] ] atom._charges['mol2'] = float(atmline[8]) atom.chargeSet = mol2 # atom.conformation = 0 atom.hetatm = 0 #Add a data member containing a list of string describing # the Sybyl status bis of the atoms. atom.status = status #add altname so buildBondsByDist doesn't croak atom.altname = None self.mol.allAtoms.append(atom) self.mol.residues = self.mol.children assert hasattr(self.mol, 'chains') delattr(self.mol, 'chains') delattr(self.mol, 'curRes')
class Mol2Parser(MoleculeParser): Mol2Tags = ["@<TRIPOS>AlT_TYPE","@<TRIPOS>ANCHOR_ATOM", "@<TRIPOS>ASSOCIATED_ANNOTATION", "@<TRIPOS>ATOM", "@<TRIPOS>BOND","@<TRIPOS>CENTER_OF_MASS", "@<TRIPOS>CENTROID", "@<TRIPOS>COMMENT", "@<TRIPOS>CRYSIN", "@<TRIPOS>CURR_POS", "@<TRIPOS>DICT", "@<TRIPOS>DATA_FILE", "@<TRIPOS>EXTENSION_POINT", "@<TRIPOS>FF_PBC", "@<TRIPOS>FFCON_ANGLE", "@<TRIPOS>FFCON_DIST","@<TRIPOS>FFCON_RANGE", "@<TRIPOS>FFCON_TORSION", "@<TRIPOS>LINE", "@<tripos>LSPLANE", "@<TRIPOS>MOLECULE", "@<TRIPOS>NORMAL", "@<TRIPOS>POLYBUILD_HIST", "@<TRIPOS>QSAR_ALIGN_RULE", "@<TRIPOS>RING_CLOSURE", "@<TRIPOS>ROTABLE_BOND", "@<TRIPOS>SEARCH_DIST", "@<TRIPOS>SEARCH_OPTIONS", "@<TRIPOS>SUBSTRUCTURE", "@<TRIPOS>U_FEAT"] def __init__(self, filename): MoleculeParser.__init__(self, filename) self.mol2RecordParser = {} self.defaultReadOptions = ['@<TRIPOS>ATOM','@<TRIPOS>BOND', '@<TRIPOS>MOLECULE', '@<TRIPOS>SET','@<TRIPOS>SUBSTRUCTURE', '@<TRIPOS>DICT'] self.keysAndLinesIndices = {} # stores all Mol2 keys . self.counter = 0 self.setsDatas = [] #self.molList = [] def getKeysAndLinesIndices(self): """ Function to build a dictionary where the keys will be the records name of the mol2 files (@<TRIPOS>ATOM, @<TRIPOS>BOND...) and the value will be the index of the starting line of that record. """ #this removes all comment and blank lines to fix bug #846 for i,line in enumerate(self.allLines): if not line: self.allLines.pop(i) elif line[0] == '#': self.allLines.pop(i) i = 0 record = None while i != len(self.allLines): if self.allLines[i][:9] == '@<TRIPOS>': if self.keysAndLinesIndices: self.keysAndLinesIndices[record].append(i) record = string.strip(self.allLines[i]) self.keysAndLinesIndices[record] = [i+1] i = i+1 else: i = i+1 if record: self.keysAndLinesIndices[record].append(i) else: print " the file %s doesn't contain any mol2 records"%self.filename def parse(self): """ This function read a file and create the corresponding data hierarchy. """ self.readFile() #molList = [] molList = ProteinSet() if self.allLines is None: return elif len(self.allLines)!=0: self.getKeysAndLinesIndices() else: print "The file %s is empty"%self.filename return molList if not self.keysAndLinesIndices.has_key("@<TRIPOS>ATOM"): print "The file %s doesn't have Atom records, molecules can't be built"%self.filename return molList if self.keysAndLinesIndices.has_key('@<TRIPOS>SUBSTRUCTURE'): self.parse_MOL2_Substructure(self.allLines [self.keysAndLinesIndices ['@<TRIPOS>SUBSTRUCTURE'][0]: self.keysAndLinesIndices ['@<TRIPOS>SUBSTRUCTURE'][1]]) molList.append(self.mol) else: atmlines = map(string.split, self.allLines [self.keysAndLinesIndices ['@<TRIPOS>ATOM'][0]: self.keysAndLinesIndices ['@<TRIPOS>ATOM'][1]]) self.build4LevelsTree({},atmlines) ## self.build2LevelsTree(map(string.split, self.allLines ## [self.keysAndLinesIndices ## ['@<TRIPOS>ATOM'][0]: ## self.keysAndLinesIndices ## ['@<TRIPOS>ATOM'][1]])) molList.append(self.mol) if self.keysAndLinesIndices.has_key('@<TRIPOS>BOND'): self.parse_MOL2_Bonds(self.allLines [self.keysAndLinesIndices ['@<TRIPOS>BOND'][0]: self.keysAndLinesIndices['@<TRIPOS>BOND'] [1]]) if self.keysAndLinesIndices.has_key('@<TRIPOS>SET'): self.parse_MOL2_Sets(self.keysAndLinesIndices['@<TRIPOS>SET']) return molList def parse_MOL2_Substructure(self, substlines): """build a dictionary with the chain id as keys and the list of residues belonging to that chain as values. If the id of the chain is not here then the keys is '', if two residues with the same name but belonging to two different chains then the value corresponding to that key is a list of the chains ID.""" atomlines = map(string.split, self.allLines [self.keysAndLinesIndices ['@<TRIPOS>ATOM'][0]: self.keysAndLinesIndices ['@<TRIPOS>ATOM'][1]]) subst_chain = {} if len(substlines) == 0: # case 1: no substructures are defined --> 2 levels tree. #self.build2LevelsTree(atomlines) #subst_chain = {'t self.build4LevelsTree(subst_chain,atomlines) ## else: ## substlines = map(string.split, substlines) ## lines = filter(lambda x: len(x)>5, substlines) ## if lines == [] or (lines != [] and \ ## filter(lambda x: x[5] != '****', lines)==[]): ## #self.build3LevelsTree(atomlines) ## else: ## # case 3: at least 1 substructure and 1 chain --> 4 levels tree. ## #subst_chain = {} ## for line in substlines: ## try: ## if line[1] in subst_chain.keys(): ## subst_chain[line[1]] = [subst_chain[line[1]]] ## subst_chain[line[1]].append(line[5]) ## else: ## subst_chain[line[1]] = line[5] ## except: ## if line[1] in subst_chain.keys(): ## list(subst_chain[line[1]]).append('') ## else: ## subst_chain[line[1]] = '' ## self.subst_chain = subst_chain ## self.build4LevelsTree(subst_chain,atomlines) else: # case 3: at least 1 substructure and 1 chain --> 4 levels tree. #subst_chain = {} substlines = map(string.split, substlines) for line in substlines: if len(line)<6 or line[5] == '****': continue else: try: if line[1] in subst_chain.keys(): subst_chain[line[1]] = [subst_chain[line[1]]] subst_chain[line[1]].append(line[5]) else: subst_chain[line[1]] = line[5] except: if line[1] in subst_chain.keys(): list(subst_chain[line[1]]).append('') else: subst_chain[line[1]] = '' self.subst_chain = subst_chain self.build4LevelsTree(subst_chain,atomlines) def build2LevelsTree (self, atomlines): """ Function to build a two level tree. """ print 'try to build a 2 level tree' self.mol= Molecule() self.mol.allAtoms = AtomSet() self.mol.atmNum = {} self.mol.parser = self if self.mol.name == 'NoName': self.mol.name = os.path.basename(os.path.splitext (self.filename)[0]) self.mol.children = AtomSet([]) self.mol.childrenName = 'atoms' self.mol.childrenSetClass = AtomSet self.mol.elementType = Atom self.mol.levels = [Molecule, Atom] ##1/18:self.mol.levels = [Protein, Atom] for atmline in atomlines: atom = Atom(atmline[1], self.mol, chemicalElement = string.split(atmline[5], '.')[0], top = self.mol) #atom.element = atmline[5][0] atom.element = atom.chemElem atom.number = int(atmline[0]) self.mol.atmNum[atom.number] = atom atom._coords = [ [float(atmline[2]), float(atmline[3]), float(atmline[4]) ] ] if len(atmline)>=9: atom._charges['mol2'] = float(atmline[8]) atom.chargeSet = 'mol2' # atom.conformation = 0 atom.hetatm = 0 #add altname so buildBondsByDist doesn't croak atom.altname = None self.mol.allAtoms.append(atom) self.mol.atoms = self.mol.children def build3LevelsTree(self,atomlines): """ Function to build a 3 levels hierarchy Molecule-substructure-atoms.""" self.mol= Protein() self.mol.allAtoms = AtomSet() self.mol.atmNum = {} self.mol.parser = self if self.mol.name == 'NoName': self.mol.name = os.path.basename(os.path.splitext (self.filename)[0]) self.mol.children = ResidueSet([]) self.mol.childrenName = 'residues' self.mol.childrenSetClass = ResidueSet self.mol.elementType = Residue self.mol.curRes = Residue() self.mol.curRes.hasCA = 0 self.mol.curRes.hasO = 0 self.mol.levels = [Protein, Residue, Atom] for atmline in atomlines: if len(atmline)>= 10: status = string.split(atmline[9], '|') else: status = None resName = atmline[7][:3] resSeq = atmline[7][3:] if resSeq != self.mol.curRes.number or \ resName != self.mol.curRes.type: # check if this residue already exists na = string.strip(resName) + string.strip(resSeq) res = self.mol.get(na) if res: self.mol.curRes = res[0] else: self.mol.curRes = Residue(resName, resSeq, '', self.mol, top = self.mol) name = atmline[1] if name == 'CA': self.mol.curRes.hasCA = 1 if name == 'O' : self.mol.curRes.hasO = 2 atom = Atom(name, self.mol.curRes, top = self.mol, chemicalElement = string.split(atmline[5], '.')[0]) #atom.element = atmline[5][0] atom.element = atom.chemElem atom.number = int(atmline[0]) self.mol.atmNum[atom.number] = atom atom._coords = [ [float(atmline[2]), float(atmline[3]), float(atmline[4]) ] ] atom._charges['mol2'] = float(atmline[8]) atom.chargeSet = mol2 # atom.conformation = 0 atom.hetatm = 0 #Add a data member containing a list of string describing # the Sybyl status bis of the atoms. atom.status = status #add altname so buildBondsByDist doesn't croak atom.altname = None self.mol.allAtoms.append(atom) self.mol.residues = self.mol.children assert hasattr(self.mol, 'chains') delattr(self.mol, 'chains') delattr(self.mol, 'curRes') def build4LevelsTree(self, subst_chain, atomlines): """ Function to build a 4 level hierarchy Protein-Chain-Residue-Atom. """ self.mol= Protein() self.mol.allAtoms = AtomSet() self.mol.atmNum = {} self.mol.parser = self if self.mol.name == 'NoName': self.mol.name = os.path.basename(os.path.splitext (self.filename)[0]) self.mol.curChain = Chain() self.mol.curRes = Residue() self.mol.levels = [Protein, Chain, Residue, Atom] i = 1 for atmline in atomlines: if len(atmline)>= 10: status = string.split(atmline[9], '|') else: status = None if len(atmline) == 8: tmp = [atmline[5][:5], atmline[5][5:]] atmline[5] = tmp[0] atmline.insert(6, tmp[1]) if status and status[0]=='WATER': chainID = 'W' atmline[7] = 'HOH'+str(i) subst_chain[atmline[7]] = chainID i = i+1 if subst_chain == {}: chainID = 'default' elif not subst_chain.has_key(atmline[7]): if subst_chain.has_key('****'): try: chainID = subst_chain[atmline[7]] except: chainID = 'default' else: chainID = 'default' elif type(subst_chain[atmline[7]]) is types.StringType: # that is to say that only chains has this substructure name. chainID = subst_chain[atmline[7]] elif type(subst_chain[atmline[7]]) is types.ListType: # That is to say that several chains have the same substructure. chainID = subst_chain[atmline[7]][0] subst_chain[atmline[7]] = subst_chain[atmline[7]].remove(chainID) if chainID != self.mol.curChain.id: if not self.mol.chains.id or not chainID in self.mol.chains.id: self.mol.curChain = Chain(chainID, self.mol, top = self.mol) else: self.mol.curChain = self.mol.chains.get(chainID)[0] if len(atmline)<7: # test if the atmline has a res name and resseq: resName = 'RES' resSeq = '1' else: resName = atmline[7][:3] resSeq = atmline[7][3:] if resSeq != self.mol.curRes.number or \ resName != self.mol.curRes.type: # check if this residue already exists na = string.strip(resName) + string.strip(resSeq) res = self.mol.curChain.get( na ) if res: self.mol.curRes = res[0] else: self.mol.curRes = Residue(resName, resSeq, '', self.mol.curChain, top = self.mol) name = atmline[1] if name == 'CA': self.mol.curRes.hasCA = 1 if name == 'O' : self.mol.curRes.hasO = 2 atom = Atom(name, self.mol.curRes, top = self.mol, chemicalElement = string.split(atmline[5], '.')[0]) #atom.element = atmline[5][0] atom.element = atom.chemElem atom.number = int(atmline[0]) self.mol.atmNum[atom.number] = atom atom._coords = [ [float(atmline[2]), float(atmline[3]), float(atmline[4]) ] ] if len(atmline)>=9: atom._charges['mol2'] = float(atmline[8]) atom.chargeSet = 'mol2' # atom.conformation = 0 atom.hetatm = 0 #Add a data member containing a list of string describing # the Sybyl status bis of the atoms. atom.status = status #add altname so buildBondsByDist doesn't croak atom.altname = None self.mol.allAtoms.append(atom) delattr(self.mol, 'curRes') delattr(self.mol, 'curChain') def parse_MOL2_Molecule(self, mollines): """Function to parse the Molecule records""" mollines = map(string.split, mollines) return mollines def parse_MOL2_Bonds(self, bondlines): """ Function to build the bonds object using the bond record of the mol2 file.""" bondlines = map(string.split, bondlines) for bd in bondlines: at1 = self.mol.atmNum[int(bd[1])] at2 = self.mol.atmNum[int(bd[2])] if at1.isBonded(at2): continue bond = Bond(at1, at2, check=0) bond.type = bd[3] try: bond.bondOrder = int(bd[3]) except: if bd[3]=='ar': bond.bondOrder = 'aromatic' elif bd[3]=='am': bond.bondOrder = 'amide' else: bond.bondOrder = bd[3] self.mol.bondsflag = 1 self.mol.hasBonds = 1 def parse_MOL2_Sets(self, setRecords): """ Function to parse the Sets records""" setRecords = map(string.split, self.allLines[setRecords[0]: setRecords[1]]) i = 0 while i!=len(setRecords): rec = [] if len(setRecords[i]) <= 5: comments = None for j in xrange(len(setRecords[i])): rec.append(setRecords[i][j]) rec.append(comments) else : for j in xrange(len(setRecords[i][:5])): rec.append(setRecords[i][j]) comments = setRecords[i][5] for j in xrange(6, len(setRecords[i])): comments = comments+' '+setRecords[i][j] rec.append(comments) number = [] self.setsDatas.append(rec) ## self.setsDatas.append([setRecords[i][0], setRecords[i][1], ## setRecords[i][2], setRecords[i][3], ## setRecords[i][4],comments]) while len(setRecords[i+1])!=0 and setRecords[i+1][-1] == '\\': number = number+(map(lambda x: int(x), setRecords[i+1][:-1])) i = i+1 number = number+map(lambda x: int(x),setRecords[i+1]) self.setsDatas[-1].append(number) i = i+2 def hasSsDataInFile(self): """ Function to extract the data on the secondarystructure and that replace the root atom number by the residue instance corresonding. """ hData = filter(lambda x: x[0][:4] == 'HELI',self.setsDatas) sData = filter(lambda x: x[0][:4] == 'SHEE',self.setsDatas) tData = filter(lambda x: x[0][:4] == 'TURN',self.setsDatas) self.processSSEltData(sData, self.mol) self.processSSEltData(hData, self.mol ) self.processSSEltData(tData, self.mol) self.ssData = [hData, sData, tData] if self.ssData == []: return 0 else: return 1 def parseSSData(self, mol): """ Function to parse the info and return a list containing, the record name, and then the first and last residues for each secondary structure . """ if not hasattr(self, 'ssData'): self.hasSsDataInFile() # Step 1: Create a list containing the information describing the # the secondary structures organized the following way: # [ ['chain1ID', [Helix, [startHel1, endHel1],[startHel2, endHel2]], # [Strand, [startSheet1, endSheet1]] ], ['chain2ID', [Helix .....]] ] ssDataForMol = {} for c in mol.chains: helStartEndForChain = self.processSSData(self.ssData[0], c) helStartEndForChain.insert(0, Helix) strandData = self.findStrands(self.ssData[1]) strandStartEndForChain = self.processSSData(strandData, c) strandStartEndForChain.insert(0, Strand) turnStartEndForChain = self.processSSData(self.ssData[2], c) turnStartEndForChain.insert(0, Turn) ssDataForMol[c.id] = [ helStartEndForChain,strandStartEndForChain, turnStartEndForChain, None] return ssDataForMol def findStrands(self, data): """ Function to separate each strands of a sheet.""" if len(data) == 0: return data else: for sheet in data: strandsBreak = [] strandData = [] for i in xrange(1,len(sheet[6])): if i != 1 and \ int(sheet[6][i].number) - int(sheet[6][i-1].number)!=1: strandsBreak.append(i) if len(strandsBreak) == 0: strandData = sheet else: i = 0 strandData.append(sheet[0],sheet[1],sheet[2], sheet[3],sheet[4],sheet[5], sheet[6][:strandsBreak[i]]) i = i+1 while i!= len(strandsBreak): strandData.append(sheet[0],sheet[1],sheet[2], sheet[3],sheet[4],sheet[5], sheet[6][strandsBreak[i-1]: strandsBreak[i]]) i = i+1 strandData.append(sheet[0],sheet[1],sheet[2], sheet[3],sheet[4],sheet[5], sheet[6][strandsBreak[i-1]:]) return strandData def processSSData(self, data, chain): """ Function returning the information on the secondary structure of a given chain as a list which format is the following: - the first element of the list is the name of the secondary structure type ('Helix', 'Sheet', 'Turn') - the other are tuple containing the first residue of the structure, and the last one. This information is used by the class GetSecondarySTructureFromFile. """ dataByChainID = filter(lambda x, id = chain.id: x[-1][1].parent.id == id, data) startEnd = map(lambda x: (x[-1][1],x[-1][-1]), dataByChainID) return startEnd def processSSEltData(self, ssData, mol): """ Function to get the residue corresponding to the root atom number. """ atoms = mol.chains.residues.atoms for data in ssData: for i in xrange(1,len(data[6])): if isinstance(data[6][i], types.IntType): data[6][i] = atoms[data[6][i]-1].parent else: return def getMoleculeInformation(self): """ Function to get the information on a molecule""" molStr = self.parse_MOL2_Molecule(self.allLines [self.keysAndLinesIndices ['@<TRIPOS>MOLECULE'][0]: self.keysAndLinesIndices ['@<TRIPOS>MOLECULE'][1]]) chemical_formula = None if molStr != []: try: chemical_formula = molStr[-1][0] except: pass molStr = molStr[0][0] else: molStr = '' if chemical_formula in ["USER_CHARGES","NO_CHARGES"]: return molStr elif chemical_formula is not None: return "%s %s" %(molStr, chemical_formula) return molStr
def parse(self, objClass=Protein): """Parses mmCIF dictionary (self.mmCIF_dict) into MolKit object""" if self.allLines is None and self.filename: self.readFile() if self.allLines is None or len(self.allLines) == 0: return self.mmCIF2Dict() type_symbol = None B_iso_or_equiv = None mmCIF_dict = self.mmCIF_dict fileName, fileExtension = os.path.splitext(self.filename) molName = os.path.basename(fileName) if mmCIF_dict.has_key('_entry.id'): molName = mmCIF_dict['_entry.id'] if mmCIF_dict.has_key('_atom_site.id'): #The description of the data names can be found in the following link #http://mmcif.pdb.org/dictionaries/mmcif_pdbx.dic/Items ids = mmCIF_dict['_atom_site.id'] #1 number group_PDB = mmCIF_dict['_atom_site.group_PDB'] #2 atom/hetatm atom_id = mmCIF_dict['_atom_site.label_atom_id'] #3 name comp_id = mmCIF_dict['_atom_site.label_comp_id'] #4 residue type label_asym_id = mmCIF_dict['_atom_site.label_asym_id'] #5 chain #Note: chain ID from mmCIF file might be different from PDB file seq_id = mmCIF_dict['_atom_site.label_seq_id'] #6 residue number x_coords = mmCIF_dict['_atom_site.Cartn_x'] #7 xcoord y_coords = mmCIF_dict['_atom_site.Cartn_y'] #8 ycoord z_coords = mmCIF_dict['_atom_site.Cartn_z'] #9 zcoord occupancy = mmCIF_dict['_atom_site.occupancy'] #10 B_iso_or_equiv = mmCIF_dict['_atom_site.B_iso_or_equiv'] #11 type_symbol = mmCIF_dict['_atom_site.type_symbol'] elif mmCIF_dict.has_key('_atom_site_label'): #ftp://ftp.iucr.org/pub/cif_core.dic atom_id = mmCIF_dict['_atom_site_label'] len_atoms = len(atom_id) ids = range(len_atoms) group_PDB = len_atoms * ['HETATM'] comp_id = len_atoms * ["CIF"] label_asym_id = len_atoms * ['1'] seq_id = len_atoms * [1] from mglutil.math.crystal import Crystal a = mmCIF_dict['_cell.length_a'] = float( mmCIF_dict['_cell_length_a'].split('(')[0]) b = mmCIF_dict['_cell.length_b'] = float( mmCIF_dict['_cell_length_b'].split('(')[0]) c = mmCIF_dict['_cell.length_c'] = float( mmCIF_dict['_cell_length_c'].split('(')[0]) alpha = mmCIF_dict['_cell.angle_alpha'] = float( mmCIF_dict['_cell_angle_alpha'].split('(')[0]) beta = mmCIF_dict['_cell.angle_beta'] = float( mmCIF_dict['_cell_angle_beta'].split('(')[0]) gamma = mmCIF_dict['_cell.angle_gamma'] = float( mmCIF_dict['_cell_angle_gamma'].split('(')[0]) cryst = Crystal((a, b, c), (alpha, beta, gamma)) x = [] for item in mmCIF_dict['_atom_site_fract_x']: x.append(float(item.split('(')[0])) y = [] for item in mmCIF_dict['_atom_site_fract_y']: y.append(float(item.split('(')[0])) z = [] for item in mmCIF_dict['_atom_site_fract_z']: z.append(float(item.split('(')[0])) x_coords = [] y_coords = [] z_coords = [] B_iso_or_equiv = [] for i in ids: trans = cryst.toCartesian([x[i], y[i], z[i]]) x_coords.append(trans[0]) y_coords.append(trans[1]) z_coords.append(trans[2]) if mmCIF_dict.has_key('_atom_site_U_iso_or_equiv'): B_iso_or_equiv.append( mmCIF_dict['_atom_site_U_iso_or_equiv'][i].split( '(')[0]) if mmCIF_dict.has_key('_atom_site_type_symbol'): type_symbol = mmCIF_dict['_atom_site_type_symbol'] if mmCIF_dict.has_key('_atom_site_occupancy'): occupancy = mmCIF_dict['_atom_site_occupancy'] if mmCIF_dict.has_key('_chemical_name_common'): molName = mmCIF_dict['_chemical_name_common'] elif mmCIF_dict.has_key('_chemical_name_mineral'): molName = mmCIF_dict['_chemical_name_mineral'] if mmCIF_dict.has_key('_symmetry_space_group_name_H-M'): mmCIF_dict['_symmetry.space_group_name_H-M'] = mmCIF_dict[ '_symmetry_space_group_name_H-M'] else: print 'No _atom_site.id or _atom_site_label record is available in %s' % self.filename return None mol = Protein() self.mol = mol self.mol.allAtoms = AtomSet([]) molList = mol.setClass() molList.append(mol) current_chain_id = None current_residue_number = None current_chain = None current_residue = None number_of_atoms = len(ids) self.configureProgressBar(init=1, mode='increment', authtext='parse atoms', max=number_of_atoms) for index in range(number_of_atoms): #make a new atom for the current index chain_id = label_asym_id[index] if chain_id != current_chain_id: #make a new chain #molecule should adopt the current chain if there is one current_chain = Chain(id=chain_id) # FIXME: current_chain should not have allAtoms attribute delattr(current_chain, "allAtoms") current_chain_id = chain_id if current_chain is not None: #REMEMBER TO ADOPT THE LAST ONE!!! mol.adopt(current_chain, setChildrenTop=1) residue_number = seq_id[index] if residue_number != current_residue_number or chain_id != label_asym_id[ index - 1]: #make a new chain: #current_chain should adopt the current residue if there is one #create new residue residue_type = comp_id[index] current_residue = Residue(type=residue_type, number=residue_number) current_residue_number = residue_number if current_residue is not None: #REMEMBER TO ADOPT THE LAST ONE!!! current_chain.adopt(current_residue, setChildrenTop=1) name = atom_id[index] if type_symbol: element = type_symbol[index] else: element = None atom = Atom(name, current_residue, element, top=mol) atom._coords = [[ float(x_coords[index]), float(y_coords[index]), float(z_coords[index]) ]] atom._charges = {} atom.segID = mol.name atom.normalname = name atom.number = int(ids[index]) mol.atmNum[atom.number] = atom atom.occupancy = float(occupancy[index]) if B_iso_or_equiv: atom.temperatureFactor = float(B_iso_or_equiv[index]) atom.altname = None atom.hetatm = 0 if group_PDB[index] == 'HETATM': atom.hetatm = 1 self.updateProgressBar() self.parse_MMCIF_CELL() try: self.parse_MMCIF_HYDBND() except: print >> sys.stderr, "Parsing Hydrogen Bond Record Failed in", self.filename mol.name = molName mol.allAtoms = mol.chains.residues.atoms mol.parser = self mol.levels = [Protein, Chain, Residue, Atom] name = '' for n in molList.name: name = n + ',' name = name[:-1] molList.setStringRepr(name) strRpr = name + ':::' molList.allAtoms.setStringRepr(strRpr) for m in molList: mname = m.name strRpr = mname + ':::' m.allAtoms.setStringRepr(strRpr) strRpr = mname + ':' m.chains.setStringRepr(strRpr) for c in m.chains: cname = c.id strRpr = mname + ':' + cname + ':' c.residues.setStringRepr(strRpr) for r in c.residues: rname = r.name strRpr = mname + ':' + cname + ':' + rname + ':' r.atoms.setStringRepr(strRpr) self.buildBonds() return molList
def makeMoleculeFromAtoms(molname, atomSet): """ create a new molecule from a list of atoms mol <- makeMoleculeFromAtoms(molname, atomSet) """ from MolKit.molecule import Atom, AtomSet from MolKit.protein import Protein, Chain, Residue # create the top object mol = Protein(name=molname) # find out all residues residues = atomSet.parent.uniq() # find out all chains chains = residues.parent.uniq() # create all chains chainsd = {} for c in chains: newchain = Chain(c.id, mol, top=mol) chainsd[c] = newchain # create all residues resd = {} for res in residues: newres = Residue(res.name[:3], res.name[3:], res.icode, chainsd[res.parent], top=mol) resd[res] = newres newres.hasCA = 0 newres.hasO = 0 # create all the atoms newats = [] for num, at in enumerate(atomSet): name = at.name res = resd[at.parent] name1 = name if hasattr(at, "altname") and at.altname != None: name = at.name.split("@")[0] if name == 'CA': res.hasCA = 1 if name == 'O' or name == 'OXT' or (len(name) > 3 and name[:3] == 'OCT'): res.hasO = 2 newat = Atom(name, res, at.element, top=mol) if name != name1: newat.name = name1 newat.altname = at.altname newats.append(newat) # set constructotr attributes newat._coords = [] for coords in at._coords: newat._coords.append(coords[:]) newat.conformation = at.conformation newat.chemElem = at.chemElem newat.atomicNumber = at.atomicNumber newat.bondOrderRadius = at.bondOrderRadius newat.covalentRadius = at.covalentRadius newat.vdwRadius = at.vdwRadius newat.maxBonds = at.maxBonds newat.organic = at.organic newat.colors = at.colors.copy() newat.opacities = at.opacities.copy() newat._charges = at._charges.copy() newat.chargeSet = at.chargeSet # set attributes from PDB parser try: # pdbqs do not have this newat.segID = at.segID except AttributeError: pass newat.hetatm = at.hetatm try: # pdbqs do not have this newat.normalname = at.normalname except AttributeError: pass newat.number = num #at.number newat.occupancy = at.occupancy newat.temperatureFactor = at.temperatureFactor newat.altname = at.altname # attribute created by PQR parser if hasattr(at, 'pqrRadius'): newat.pqrRadius = at.pqrRadius # attribute created by F2D parser if hasattr(at, 'hbstatus'): newat.hbstatus = at.hbstatus # attribute created by PDBQ parser if hasattr(at, 'autodock_element'): newat.autodock_element = at.autodock_element # attribute created by PDBQT parser #if hasattr(at, ''): # newat. = at. # attribute created by PDBQS parser if hasattr(at, 'AtVol'): newat.AtVol = at.AtVol newat.AtSolPar = at.AtSolPar mol.allAtoms = AtomSet(newats) return mol
def build3LevelsTree(self, atomlines): """ Function to build a 3 levels hierarchy Molecule-substructure-atoms.""" self.mol = Protein() self.mol.allAtoms = AtomSet() self.mol.atmNum = {} self.mol.parser = self if self.mol.name == 'NoName': self.mol.name = os.path.basename( os.path.splitext(self.filename)[0]) self.mol.children = ResidueSet([]) self.mol.childrenName = 'residues' self.mol.childrenSetClass = ResidueSet self.mol.elementType = Residue self.mol.curRes = Residue() self.mol.curRes.hasCA = 0 self.mol.curRes.hasO = 0 self.mol.levels = [Protein, Residue, Atom] for atmline in atomlines: if len(atmline) >= 10: status = string.split(atmline[9], '|') else: status = None resName = atmline[7][:3] resSeq = atmline[7][3:] if resSeq != self.mol.curRes.number or \ resName != self.mol.curRes.type: # check if this residue already exists na = string.strip(resName) + string.strip(resSeq) res = self.mol.get(na) if res: self.mol.curRes = res[0] else: self.mol.curRes = Residue(resName, resSeq, '', self.mol, top=self.mol) name = atmline[1] if name == 'CA': self.mol.curRes.hasCA = 1 if name == 'O': self.mol.curRes.hasO = 2 atom = Atom(name, self.mol.curRes, top=self.mol, chemicalElement=string.split(atmline[5], '.')[0]) #atom.element = atmline[5][0] atom.element = atom.chemElem atom.number = int(atmline[0]) self.mol.atmNum[atom.number] = atom atom._coords = [[ float(atmline[2]), float(atmline[3]), float(atmline[4]) ]] atom._charges['mol2'] = float(atmline[8]) atom.chargeSet = mol2 # atom.conformation = 0 atom.hetatm = 0 #Add a data member containing a list of string describing # the Sybyl status bis of the atoms. atom.status = status #add altname so buildBondsByDist doesn't croak atom.altname = None self.mol.allAtoms.append(atom) self.mol.residues = self.mol.children assert hasattr(self.mol, 'chains') delattr(self.mol, 'chains') delattr(self.mol, 'curRes')
def parse( self, objClass=Protein ): if self.allLines is None and self.filename: self.readFile() if self.allLines is None or len(self.allLines)==0: return mol = Protein() self.mol = mol molList = mol.setClass() molList.append( mol ) current_residue_number = None current_chain = None current_residue = None number_of_atoms = int(self.allLines[1][:5]) self.configureProgressBar( init=1, mode='increment', authtext='parse atoms', max=number_of_atoms ) current_chain = Chain( id='GRO',) #FIX this: The existence of allAtoms attribute (and the fact that it is an empty set rather than all atoms in the chain) causes getNodesByMolecule() to return wrong values if hasattr(current_chain, "allAtoms"): del(current_chain.allAtoms) #current_chain = Chain( id='GRO',parent = mol) mol.adopt( current_chain, setChildrenTop=1 ) for index in range( 2,number_of_atoms+2 ): residue_number = int(self.allLines[index][:5]) if residue_number!=current_residue_number:# #current_chain should adopt the current residue if there is one #create new residue res_type = self.allLines[index][5:10] residue_type = res_type.split(' ')[0] current_residue = Residue( type=residue_type, number=residue_number ) current_residue_number = residue_number if current_residue is not None: #REMEMBER TO ADOPT THE LAST ONE!!! current_chain.adopt( current_residue, setChildrenTop=1 ) n = self.allLines[index][10:15] name = n.split(' ')[-1] element = name if element in babel_elements.keys(): element = element else: if residue_type == "System" or residue_type == "SOL": #if element[1] == 'W': # element = 'H' # group is treated as one particle #else: element = element[0] elif element[:2] == 'Me': element = 'C' else: element = element[0] #if len(element)>1: # if type(element[1]) == types.StringType: # # if element[1] == element[1].lower(): # element =element # else: # element = element[0] # # else: # element = element[0] atom = Atom( name, current_residue, element, top=mol ) c = self.allLines[index][15:20] cx = self.allLines[index][20:28] cy = self.allLines[index][28:36] cz = self.allLines[index][36:44] x = float(cx)*10 y = float(cy)*10 z = float(cz)*10 atom._coords = [[x, y, z]] atom._charges = [] atom.segID = mol.name atom.normalname = name atom.number = int(self.allLines[index][15:20]) atom.elementType = name[0] mol.atmNum[atom.number] = atom atom.altname = None atom.hetatm = 0 mol.name = os.path.split(os.path.splitext(self.filename)[0])[-1] mol.allAtoms = mol.chains.residues.atoms mol.parser = self mol.levels = [Protein, Chain, Residue, Atom] name = '' for n in molList.name: name = n + ',' name = name[:-1] molList.setStringRepr( name ) strRpr = name + ':::' molList.allAtoms.setStringRepr( strRpr ) for m in molList: mname = m.name strRpr = mname + ':::' m.allAtoms.setStringRepr( strRpr ) strRpr = mname + ':' m.chains.setStringRepr( strRpr ) for c in m.chains: cname = c.id strRpr = mname + ':' + cname + ':' c.residues.setStringRepr( strRpr ) for r in c.residues: rname = r.name strRpr = mname + ':' + cname + ':' + rname + ':' r.atoms.setStringRepr( strRpr ) return molList
class Mol2Parser(MoleculeParser): Mol2Tags = [ "@<TRIPOS>AlT_TYPE", "@<TRIPOS>ANCHOR_ATOM", "@<TRIPOS>ASSOCIATED_ANNOTATION", "@<TRIPOS>ATOM", "@<TRIPOS>BOND", "@<TRIPOS>CENTER_OF_MASS", "@<TRIPOS>CENTROID", "@<TRIPOS>COMMENT", "@<TRIPOS>CRYSIN", "@<TRIPOS>CURR_POS", "@<TRIPOS>DICT", "@<TRIPOS>DATA_FILE", "@<TRIPOS>EXTENSION_POINT", "@<TRIPOS>FF_PBC", "@<TRIPOS>FFCON_ANGLE", "@<TRIPOS>FFCON_DIST", "@<TRIPOS>FFCON_RANGE", "@<TRIPOS>FFCON_TORSION", "@<TRIPOS>LINE", "@<tripos>LSPLANE", "@<TRIPOS>MOLECULE", "@<TRIPOS>NORMAL", "@<TRIPOS>POLYBUILD_HIST", "@<TRIPOS>QSAR_ALIGN_RULE", "@<TRIPOS>RING_CLOSURE", "@<TRIPOS>ROTABLE_BOND", "@<TRIPOS>SEARCH_DIST", "@<TRIPOS>SEARCH_OPTIONS", "@<TRIPOS>SUBSTRUCTURE", "@<TRIPOS>U_FEAT" ] def __init__(self, filename): MoleculeParser.__init__(self, filename) self.mol2RecordParser = {} self.defaultReadOptions = [ '@<TRIPOS>ATOM', '@<TRIPOS>BOND', '@<TRIPOS>MOLECULE', '@<TRIPOS>SET', '@<TRIPOS>SUBSTRUCTURE', '@<TRIPOS>DICT' ] self.keysAndLinesIndices = {} # stores all Mol2 keys . self.counter = 0 self.setsDatas = [] #self.molList = [] def getKeysAndLinesIndices(self): """ Function to build a dictionary where the keys will be the records name of the mol2 files (@<TRIPOS>ATOM, @<TRIPOS>BOND...) and the value will be the index of the starting line of that record. """ #this removes all comment and blank lines to fix bug #846 for i, line in enumerate(self.allLines): if not line: self.allLines.pop(i) elif line[0] == '#': self.allLines.pop(i) i = 0 record = None while i != len(self.allLines): if self.allLines[i][:9] == '@<TRIPOS>': if self.keysAndLinesIndices: self.keysAndLinesIndices[record].append(i) record = string.strip(self.allLines[i]) self.keysAndLinesIndices[record] = [i + 1] i = i + 1 else: i = i + 1 if record: self.keysAndLinesIndices[record].append(i) else: print " the file %s doesn't contain any mol2 records" % self.filename def parse(self): """ This function read a file and create the corresponding data hierarchy. """ self.readFile() #molList = [] molList = ProteinSet() if self.allLines is None: return elif len(self.allLines) != 0: self.getKeysAndLinesIndices() else: print "The file %s is empty" % self.filename return molList if not self.keysAndLinesIndices.has_key("@<TRIPOS>ATOM"): print "The file %s doesn't have Atom records, molecules can't be built" % self.filename return molList if self.keysAndLinesIndices.has_key('@<TRIPOS>SUBSTRUCTURE'): self.parse_MOL2_Substructure(self.allLines[ self.keysAndLinesIndices['@<TRIPOS>SUBSTRUCTURE'][0]:self. keysAndLinesIndices['@<TRIPOS>SUBSTRUCTURE'][1]]) molList.append(self.mol) else: atmlines = map( string.split, self.allLines[self.keysAndLinesIndices['@<TRIPOS>ATOM'][0]:self .keysAndLinesIndices['@<TRIPOS>ATOM'][1]]) self.build4LevelsTree({}, atmlines) ## self.build2LevelsTree(map(string.split, self.allLines ## [self.keysAndLinesIndices ## ['@<TRIPOS>ATOM'][0]: ## self.keysAndLinesIndices ## ['@<TRIPOS>ATOM'][1]])) molList.append(self.mol) if self.keysAndLinesIndices.has_key('@<TRIPOS>BOND'): self.parse_MOL2_Bonds( self.allLines[self.keysAndLinesIndices['@<TRIPOS>BOND'][0]:self .keysAndLinesIndices['@<TRIPOS>BOND'][1]]) if self.keysAndLinesIndices.has_key('@<TRIPOS>SET'): self.parse_MOL2_Sets(self.keysAndLinesIndices['@<TRIPOS>SET']) return molList def parse_MOL2_Substructure(self, substlines): """build a dictionary with the chain id as keys and the list of residues belonging to that chain as values. If the id of the chain is not here then the keys is '', if two residues with the same name but belonging to two different chains then the value corresponding to that key is a list of the chains ID.""" atomlines = map( string.split, self.allLines[self.keysAndLinesIndices['@<TRIPOS>ATOM'][0]:self. keysAndLinesIndices['@<TRIPOS>ATOM'][1]]) subst_chain = {} if len(substlines) == 0: # case 1: no substructures are defined --> 2 levels tree. #self.build2LevelsTree(atomlines) #subst_chain = {'t self.build4LevelsTree(subst_chain, atomlines) ## else: ## substlines = map(string.split, substlines) ## lines = filter(lambda x: len(x)>5, substlines) ## if lines == [] or (lines != [] and \ ## filter(lambda x: x[5] != '****', lines)==[]): ## #self.build3LevelsTree(atomlines) ## else: ## # case 3: at least 1 substructure and 1 chain --> 4 levels tree. ## #subst_chain = {} ## for line in substlines: ## try: ## if line[1] in subst_chain.keys(): ## subst_chain[line[1]] = [subst_chain[line[1]]] ## subst_chain[line[1]].append(line[5]) ## else: ## subst_chain[line[1]] = line[5] ## except: ## if line[1] in subst_chain.keys(): ## list(subst_chain[line[1]]).append('') ## else: ## subst_chain[line[1]] = '' ## self.subst_chain = subst_chain ## self.build4LevelsTree(subst_chain,atomlines) else: # case 3: at least 1 substructure and 1 chain --> 4 levels tree. #subst_chain = {} substlines = map(string.split, substlines) for line in substlines: if len(line) < 6 or line[5] == '****': continue else: try: if line[1] in subst_chain.keys(): subst_chain[line[1]] = [subst_chain[line[1]]] subst_chain[line[1]].append(line[5]) else: subst_chain[line[1]] = line[5] except: if line[1] in subst_chain.keys(): list(subst_chain[line[1]]).append('') else: subst_chain[line[1]] = '' self.subst_chain = subst_chain self.build4LevelsTree(subst_chain, atomlines) def build2LevelsTree(self, atomlines): """ Function to build a two level tree. """ print 'try to build a 2 level tree' self.mol = Molecule() self.mol.allAtoms = AtomSet() self.mol.atmNum = {} self.mol.parser = self if self.mol.name == 'NoName': self.mol.name = os.path.basename( os.path.splitext(self.filename)[0]) self.mol.children = AtomSet([]) self.mol.childrenName = 'atoms' self.mol.childrenSetClass = AtomSet self.mol.elementType = Atom self.mol.levels = [Molecule, Atom] ##1/18:self.mol.levels = [Protein, Atom] for atmline in atomlines: atom = Atom(atmline[1], self.mol, chemicalElement=string.split(atmline[5], '.')[0], top=self.mol) #atom.element = atmline[5][0] atom.element = atom.chemElem atom.number = int(atmline[0]) self.mol.atmNum[atom.number] = atom atom._coords = [[ float(atmline[2]), float(atmline[3]), float(atmline[4]) ]] if len(atmline) >= 9: atom._charges['mol2'] = float(atmline[8]) atom.chargeSet = 'mol2' # atom.conformation = 0 atom.hetatm = 0 #add altname so buildBondsByDist doesn't croak atom.altname = None self.mol.allAtoms.append(atom) self.mol.atoms = self.mol.children def build3LevelsTree(self, atomlines): """ Function to build a 3 levels hierarchy Molecule-substructure-atoms.""" self.mol = Protein() self.mol.allAtoms = AtomSet() self.mol.atmNum = {} self.mol.parser = self if self.mol.name == 'NoName': self.mol.name = os.path.basename( os.path.splitext(self.filename)[0]) self.mol.children = ResidueSet([]) self.mol.childrenName = 'residues' self.mol.childrenSetClass = ResidueSet self.mol.elementType = Residue self.mol.curRes = Residue() self.mol.curRes.hasCA = 0 self.mol.curRes.hasO = 0 self.mol.levels = [Protein, Residue, Atom] for atmline in atomlines: if len(atmline) >= 10: status = string.split(atmline[9], '|') else: status = None resName = atmline[7][:3] resSeq = atmline[7][3:] if resSeq != self.mol.curRes.number or \ resName != self.mol.curRes.type: # check if this residue already exists na = string.strip(resName) + string.strip(resSeq) res = self.mol.get(na) if res: self.mol.curRes = res[0] else: self.mol.curRes = Residue(resName, resSeq, '', self.mol, top=self.mol) name = atmline[1] if name == 'CA': self.mol.curRes.hasCA = 1 if name == 'O': self.mol.curRes.hasO = 2 atom = Atom(name, self.mol.curRes, top=self.mol, chemicalElement=string.split(atmline[5], '.')[0]) #atom.element = atmline[5][0] atom.element = atom.chemElem atom.number = int(atmline[0]) self.mol.atmNum[atom.number] = atom atom._coords = [[ float(atmline[2]), float(atmline[3]), float(atmline[4]) ]] atom._charges['mol2'] = float(atmline[8]) atom.chargeSet = mol2 # atom.conformation = 0 atom.hetatm = 0 #Add a data member containing a list of string describing # the Sybyl status bis of the atoms. atom.status = status #add altname so buildBondsByDist doesn't croak atom.altname = None self.mol.allAtoms.append(atom) self.mol.residues = self.mol.children assert hasattr(self.mol, 'chains') delattr(self.mol, 'chains') delattr(self.mol, 'curRes') def build4LevelsTree(self, subst_chain, atomlines): """ Function to build a 4 level hierarchy Protein-Chain-Residue-Atom. """ self.mol = Protein() self.mol.allAtoms = AtomSet() self.mol.atmNum = {} self.mol.parser = self if self.mol.name == 'NoName': self.mol.name = os.path.basename( os.path.splitext(self.filename)[0]) self.mol.curChain = Chain() self.mol.curRes = Residue() self.mol.levels = [Protein, Chain, Residue, Atom] i = 1 for atmline in atomlines: if len(atmline) >= 10: status = string.split(atmline[9], '|') else: status = None if len(atmline) == 8: tmp = [atmline[5][:5], atmline[5][5:]] atmline[5] = tmp[0] atmline.insert(6, tmp[1]) if status and status[0] == 'WATER': chainID = 'W' atmline[7] = 'HOH' + str(i) subst_chain[atmline[7]] = chainID i = i + 1 if subst_chain == {}: chainID = 'default' elif not subst_chain.has_key(atmline[7]): if subst_chain.has_key('****'): try: chainID = subst_chain[atmline[7]] except: chainID = 'default' else: chainID = 'default' elif type(subst_chain[atmline[7]]) is types.StringType: # that is to say that only chains has this substructure name. chainID = subst_chain[atmline[7]] elif type(subst_chain[atmline[7]]) is types.ListType: # That is to say that several chains have the same substructure. chainID = subst_chain[atmline[7]][0] subst_chain[atmline[7]] = subst_chain[atmline[7]].remove( chainID) if chainID != self.mol.curChain.id: if not self.mol.chains.id or not chainID in self.mol.chains.id: self.mol.curChain = Chain(chainID, self.mol, top=self.mol) else: self.mol.curChain = self.mol.chains.get(chainID)[0] if len(atmline) < 7: # test if the atmline has a res name and resseq: resName = 'RES' resSeq = '1' else: resName = atmline[7][:3] resSeq = atmline[7][3:] if resSeq != self.mol.curRes.number or \ resName != self.mol.curRes.type: # check if this residue already exists na = string.strip(resName) + string.strip(resSeq) res = self.mol.curChain.get(na) if res: self.mol.curRes = res[0] else: self.mol.curRes = Residue(resName, resSeq, '', self.mol.curChain, top=self.mol) name = atmline[1] if name == 'CA': self.mol.curRes.hasCA = 1 if name == 'O': self.mol.curRes.hasO = 2 atom = Atom(name, self.mol.curRes, top=self.mol, chemicalElement=string.split(atmline[5], '.')[0]) #atom.element = atmline[5][0] atom.element = atom.chemElem atom.number = int(atmline[0]) self.mol.atmNum[atom.number] = atom atom._coords = [[ float(atmline[2]), float(atmline[3]), float(atmline[4]) ]] if len(atmline) >= 9: atom._charges['mol2'] = float(atmline[8]) atom.chargeSet = 'mol2' # atom.conformation = 0 atom.hetatm = 0 #Add a data member containing a list of string describing # the Sybyl status bis of the atoms. atom.status = status #add altname so buildBondsByDist doesn't croak atom.altname = None self.mol.allAtoms.append(atom) delattr(self.mol, 'curRes') delattr(self.mol, 'curChain') def parse_MOL2_Molecule(self, mollines): """Function to parse the Molecule records""" mollines = map(string.split, mollines) return mollines def parse_MOL2_Bonds(self, bondlines): """ Function to build the bonds object using the bond record of the mol2 file.""" bondlines = map(string.split, bondlines) for bd in bondlines: at1 = self.mol.atmNum[int(bd[1])] at2 = self.mol.atmNum[int(bd[2])] if at1.isBonded(at2): continue bond = Bond(at1, at2, check=0) bond.type = bd[3] try: bond.bondOrder = int(bd[3]) except: if bd[3] == 'ar': bond.bondOrder = 'aromatic' elif bd[3] == 'am': bond.bondOrder = 'amide' else: bond.bondOrder = bd[3] self.mol.bondsflag = 1 self.mol.hasBonds = 1 def parse_MOL2_Sets(self, setRecords): """ Function to parse the Sets records""" setRecords = map(string.split, self.allLines[setRecords[0]:setRecords[1]]) i = 0 while i != len(setRecords): rec = [] if len(setRecords[i]) <= 5: comments = None for j in xrange(len(setRecords[i])): rec.append(setRecords[i][j]) rec.append(comments) else: for j in xrange(len(setRecords[i][:5])): rec.append(setRecords[i][j]) comments = setRecords[i][5] for j in xrange(6, len(setRecords[i])): comments = comments + ' ' + setRecords[i][j] rec.append(comments) number = [] self.setsDatas.append(rec) ## self.setsDatas.append([setRecords[i][0], setRecords[i][1], ## setRecords[i][2], setRecords[i][3], ## setRecords[i][4],comments]) while len(setRecords[i + 1]) != 0 and setRecords[i + 1][-1] == '\\': number = number + (map(lambda x: int(x), setRecords[i + 1][:-1])) i = i + 1 number = number + map(lambda x: int(x), setRecords[i + 1]) self.setsDatas[-1].append(number) i = i + 2 def hasSsDataInFile(self): """ Function to extract the data on the secondarystructure and that replace the root atom number by the residue instance corresonding. """ hData = filter(lambda x: x[0][:4] == 'HELI', self.setsDatas) sData = filter(lambda x: x[0][:4] == 'SHEE', self.setsDatas) tData = filter(lambda x: x[0][:4] == 'TURN', self.setsDatas) self.processSSEltData(sData, self.mol) self.processSSEltData(hData, self.mol) self.processSSEltData(tData, self.mol) self.ssData = [hData, sData, tData] if self.ssData == []: return 0 else: return 1 def parseSSData(self, mol): """ Function to parse the info and return a list containing, the record name, and then the first and last residues for each secondary structure . """ if not hasattr(self, 'ssData'): self.hasSsDataInFile() # Step 1: Create a list containing the information describing the # the secondary structures organized the following way: # [ ['chain1ID', [Helix, [startHel1, endHel1],[startHel2, endHel2]], # [Strand, [startSheet1, endSheet1]] ], ['chain2ID', [Helix .....]] ] ssDataForMol = {} for c in mol.chains: helStartEndForChain = self.processSSData(self.ssData[0], c) helStartEndForChain.insert(0, Helix) strandData = self.findStrands(self.ssData[1]) strandStartEndForChain = self.processSSData(strandData, c) strandStartEndForChain.insert(0, Strand) turnStartEndForChain = self.processSSData(self.ssData[2], c) turnStartEndForChain.insert(0, Turn) ssDataForMol[c.id] = [ helStartEndForChain, strandStartEndForChain, turnStartEndForChain, None ] return ssDataForMol def findStrands(self, data): """ Function to separate each strands of a sheet.""" if len(data) == 0: return data else: for sheet in data: strandsBreak = [] strandData = [] for i in xrange(1, len(sheet[6])): if i != 1 and \ int(sheet[6][i].number) - int(sheet[6][i-1].number)!=1: strandsBreak.append(i) if len(strandsBreak) == 0: strandData = sheet else: i = 0 strandData.append(sheet[0], sheet[1], sheet[2], sheet[3], sheet[4], sheet[5], sheet[6][:strandsBreak[i]]) i = i + 1 while i != len(strandsBreak): strandData.append( sheet[0], sheet[1], sheet[2], sheet[3], sheet[4], sheet[5], sheet[6][strandsBreak[i - 1]:strandsBreak[i]]) i = i + 1 strandData.append(sheet[0], sheet[1], sheet[2], sheet[3], sheet[4], sheet[5], sheet[6][strandsBreak[i - 1]:]) return strandData def processSSData(self, data, chain): """ Function returning the information on the secondary structure of a given chain as a list which format is the following: - the first element of the list is the name of the secondary structure type ('Helix', 'Sheet', 'Turn') - the other are tuple containing the first residue of the structure, and the last one. This information is used by the class GetSecondarySTructureFromFile. """ dataByChainID = filter(lambda x, id=chain.id: x[-1][1].parent.id == id, data) startEnd = map(lambda x: (x[-1][1], x[-1][-1]), dataByChainID) return startEnd def processSSEltData(self, ssData, mol): """ Function to get the residue corresponding to the root atom number. """ atoms = mol.chains.residues.atoms for data in ssData: for i in xrange(1, len(data[6])): if isinstance(data[6][i], types.IntType): data[6][i] = atoms[data[6][i] - 1].parent else: return def getMoleculeInformation(self): """ Function to get the information on a molecule""" molStr = self.parse_MOL2_Molecule( self.allLines[self.keysAndLinesIndices['@<TRIPOS>MOLECULE'][0]:self .keysAndLinesIndices['@<TRIPOS>MOLECULE'][1]]) chemical_formula = None if molStr != []: try: chemical_formula = molStr[-1][0] except: pass molStr = molStr[0][0] else: molStr = '' if chemical_formula in ["USER_CHARGES", "NO_CHARGES"]: return molStr elif chemical_formula is not None: return "%s %s" % (molStr, chemical_formula) return molStr
def makeMoleculeFromAtoms(molname, atomSet): """ create a new molecule from a list of atoms mol <- makeMoleculeFromAtoms(molname, atomSet) """ from MolKit.molecule import Atom, AtomSet from MolKit.protein import Protein, Chain, Residue # create the top object mol = Protein(name=molname) # find out all residues residues = atomSet.parent.uniq() # find out all chains chains = residues.parent.uniq() # create all chains chainsd = {} for c in chains: newchain = Chain(c.id, mol, top=mol) chainsd[c] = newchain # create all residues resd = {} for res in residues: newres = Residue(res.name[:3], res.name[3:], res.icode, chainsd[res.parent], top=mol) resd[res] = newres newres.hasCA = 0 newres.hasO = 0 # create all the atoms newats = [] for num, at in enumerate(atomSet): name = at.name res = resd[at.parent] if name == 'CA': res.hasCA = 1 if name == 'O' or name == 'OXT' or (len(name)>3 and name[:3]=='OCT'): res.hasO = 2 newat = Atom(name, res, at.element, top=mol) newats.append(newat) # set constructotr attributes newat._coords = [] for coords in at._coords: newat._coords.append(coords[:]) newat.conformation = at.conformation newat.chemElem = at.chemElem newat.atomicNumber = at.atomicNumber newat.bondOrderRadius = at.bondOrderRadius newat.covalentRadius = at.covalentRadius newat.vdwRadius = at.vdwRadius newat.maxBonds = at.maxBonds newat.organic = at.organic newat.colors = at.colors.copy() newat.opacities = at.opacities.copy() newat._charges = at._charges.copy() newat.chargeSet = at.chargeSet # set attributes from PDB parser newat.segID = at.segID newat.hetatm = at.hetatm newat.normalname = at.normalname newat.number = num #at.number newat.occupancy = at.occupancy newat.temperatureFactor = at.temperatureFactor newat.altname = at.altname # attribute created by PQR parser if hasattr(at, 'pqrRadius'): newat.pqrRadius = at.pqrRadius # attribute created by F2D parser if hasattr(at, 'hbstatus'): newat.hbstatus = at.hbstatus # attribute created by PDBQ parser if hasattr(at, 'autodock_element'): newat.autodock_element = at.autodock_element # attribute created by PDBQT parser #if hasattr(at, ''): # newat. = at. # attribute created by PDBQS parser if hasattr(at, 'AtVol'): newat.AtVol = at.AtVol newat.AtSolPar = at.AtSolPar mol.allAtoms = AtomSet(newats) return mol
def build4LevelsTree(self, subst_chain, atomlines): """ Function to build a 4 level hierarchy Protein-Chain-Residue-Atom. """ self.mol = Protein() self.mol.allAtoms = AtomSet() self.mol.atmNum = {} self.mol.parser = self if self.mol.name == 'NoName': self.mol.name = os.path.basename( os.path.splitext(self.filename)[0]) self.mol.curChain = Chain() self.mol.curRes = Residue() self.mol.levels = [Protein, Chain, Residue, Atom] i = 1 for atmline in atomlines: if len(atmline) >= 10: status = string.split(atmline[9], '|') else: status = None if len(atmline) == 8: tmp = [atmline[5][:5], atmline[5][5:]] atmline[5] = tmp[0] atmline.insert(6, tmp[1]) if status and status[0] == 'WATER': chainID = 'W' atmline[7] = 'HOH' + str(i) subst_chain[atmline[7]] = chainID i = i + 1 if subst_chain == {}: chainID = 'default' elif not subst_chain.has_key(atmline[7]): if subst_chain.has_key('****'): try: chainID = subst_chain[atmline[7]] except: chainID = 'default' else: chainID = 'default' elif type(subst_chain[atmline[7]]) is types.StringType: # that is to say that only chains has this substructure name. chainID = subst_chain[atmline[7]] elif type(subst_chain[atmline[7]]) is types.ListType: # That is to say that several chains have the same substructure. chainID = subst_chain[atmline[7]][0] subst_chain[atmline[7]] = subst_chain[atmline[7]].remove( chainID) if chainID != self.mol.curChain.id: if not self.mol.chains.id or not chainID in self.mol.chains.id: self.mol.curChain = Chain(chainID, self.mol, top=self.mol) else: self.mol.curChain = self.mol.chains.get(chainID)[0] if len(atmline) < 7: # test if the atmline has a res name and resseq: resName = 'RES' resSeq = '1' else: resName = atmline[7][:3] resSeq = atmline[7][3:] if resSeq != self.mol.curRes.number or \ resName != self.mol.curRes.type: # check if this residue already exists na = string.strip(resName) + string.strip(resSeq) res = self.mol.curChain.get(na) if res: self.mol.curRes = res[0] else: self.mol.curRes = Residue(resName, resSeq, '', self.mol.curChain, top=self.mol) name = atmline[1] if name == 'CA': self.mol.curRes.hasCA = 1 if name == 'O': self.mol.curRes.hasO = 2 atom = Atom(name, self.mol.curRes, top=self.mol, chemicalElement=string.split(atmline[5], '.')[0]) #atom.element = atmline[5][0] atom.element = atom.chemElem atom.number = int(atmline[0]) self.mol.atmNum[atom.number] = atom atom._coords = [[ float(atmline[2]), float(atmline[3]), float(atmline[4]) ]] if len(atmline) >= 9: atom._charges['mol2'] = float(atmline[8]) atom.chargeSet = 'mol2' # atom.conformation = 0 atom.hetatm = 0 #Add a data member containing a list of string describing # the Sybyl status bis of the atoms. atom.status = status #add altname so buildBondsByDist doesn't croak atom.altname = None self.mol.allAtoms.append(atom) delattr(self.mol, 'curRes') delattr(self.mol, 'curChain')
#rec = rec + ' %-2.2s'%atm.autodock_element.upper() ## #NB: write 'A' in element slot for aromatic carbons ## if atm.autodock_element=='A': ## #in this case, columns 78+79 are blanks ## rec = rec + 'A ' ## else: ## #rec = rec + '%2.2s'%atm.element ## #5/19: ## #columns 78+79: autodock_element ## rec = rec + '%s '%atm.autodock_element ## #if atm.element!=atm.autodock_element: ## # #eg HD or NA or SA or OA, always 2 chars ## # rec = rec + '%s '%atm.autodock_element[1] ## #else: ## # rec = rec + ' ' rec = rec + '\n' return rec if __name__=='__main__': from MolKit.protein import Protein from MolKit.pdbParser import PdbParser mol = Protein() mol.read('/tsri/pdb/struct/4tpi.pdb', PdbParser()) writer = PdbWriter() writer.add_userRecord('REMARK', ) writer.add_userRecord('TITLE ', [('', 'This is the title record\n')]) writer.write('/home/ktchan/jumble.pdb', mol)
class FloodPlayer(Player): def __init__(self, command, file): master = command.vf.GUI.ROOT self.autoLigandCommand = command.vf.AutoLigandCommand self.autoLigandCommand.spheres.Set(visible=1) self.autoLigandCommand.halo.Set(visible=1) pkl_file = open(file, 'rb') self.floods = [] try: data = pickle.load(pkl_file) except Exception as inst: print("Error loading ", __file__, "\n", inst) self.xcent = data[0] self.ycent = data[1] self.zcent = data[2] self.centerx = data[3] self.centery = data[4] self.centerz = data[5] self.spacing = data[6] self.centers = [] data = pickle.load(pkl_file) self.floods.append(data[1]) try: while data: data = pickle.load(pkl_file) flood = copy.copy(self.floods[-1]) for item in data[0]: flood.remove(item) for item in data[1]: flood.append(item) self.floods.append(flood) except EOFError: pass pkl_file.close() fileName = os.path.splitext(os.path.split(file)[-1])[0] self.mol = Protein(fileName) self.mol.allAtoms = AtomSet([]) chain = Chain() self.residue = Residue(type="UNK") chain.adopt(self.residue, setChildrenTop=1) self.mol.adopt(chain, setChildrenTop=1) self.mol.parser = None self.filename = file fl = self.floods[0][0] x = (fl[1] - self.xcent) * self.spacing + self.centerx y = (fl[2] - self.ycent) * self.spacing + self.centery z = (fl[3] - self.zcent) * self.spacing + self.centerz if fl[4] == 7: atomchr = 'P' # note, this will color the NA atom pink (the PDB color for Phosphorus) radius = AAradii[13][0] if fl[4] == 6: atomchr = 'S' radius = AAradii[13][0] if fl[4] == 5: atomchr = 'A' radius = AAradii[10][0] if fl[4] == 4: atomchr = 'O' radius = AAradii[1][0] if fl[4] == 3: atomchr = 'N' radius = AAradii[4][0] if fl[4] == 2: atomchr = 'C' radius = AAradii[10][0] if fl[4] == 1: atomchr = 'H' radius = AAradii[15][0] a = Atom(atomchr, self.residue, atomchr, top=self.mol) a._coords = [[x, y, z]] a._charges = {} a.hetatm = 1 a.number = 0 a.radius = radius self.mol.allAtoms = self.residue.atoms self.mol = self.autoLigandCommand.vf.addMolecule(self.mol, False) self.mol.levels = [Protein, Chain, Residue, Atom] self.autoLigandCommand.vf.displayCPK(self.mol, scaleFactor=0.4) self.autoLigandCommand.vf.colorByAtomType(self.mol, ['cpk'], log=0) self.autoLigandCommand.vf.displayLines(self.mol, negate=True, displayBO=False, lineWidth=2, log=0, only=False) self.colorKeys = list(a.colors.keys()) maxLen = len(self.floods) - 1 Player.__init__(self, master=master, endFrame=maxLen, maxFrame=maxLen, titleStr="AutoLigand Flood Player", hasSlider=True) try: # withdrew SetAnim button self.form.ifd.entryByName['setanimB']['widget'].grid_forget() self.form.autoSize() except: pass self.nextFrame(0) self.form.root.protocol('WM_DELETE_WINDOW', self.hide_cb) def nextFrame(self, id): #Player.nextFrame(self, id) id = int(id) if id == self.currentFrameIndex: return if self.hasCounter and self.gui: self.form.ent2.delete(0, 'end') self.form.ent2.insert(0, str(id)) if self.hasSlider: self.form.ifd.entryByName['slider']['widget'].set(id) self.currentFrameIndex = int(id) removeAtoms = AtomSet([]) addAtoms = AtomSet([]) id = int(id) flood = self.floods[id] centers = [] materials = [] radii = [] prev_coords = self.mol.allAtoms.coords lenAtoms = len(prev_coords) #self.residue.atoms = AtomSet([]) index = 0 #h = self.hp.heap() #print h for fl in flood: x = (fl[1] - self.xcent) * self.spacing + self.centerx y = (fl[2] - self.ycent) * self.spacing + self.centery z = (fl[3] - self.zcent) * self.spacing + self.centerz if fl[4] == 7: atomchr = 'P' # note, this will color the NA atom pink (the PDB color for Phosphorus) radius = AAradii[13][0] if fl[4] == 6: atomchr = 'S' radius = AAradii[13][0] if fl[4] == 5: atomchr = 'A' radius = AAradii[10][0] if fl[4] == 4: atomchr = 'O' radius = AAradii[1][0] if fl[4] == 3: atomchr = 'N' radius = AAradii[4][0] if fl[4] == 2: atomchr = 'C' radius = AAradii[10][0] if fl[4] == 1: atomchr = 'H' radius = AAradii[15][0] if not [x, y, z] in prev_coords: a = Atom(atomchr, self.residue, atomchr, top=self.mol) a._coords = [[x, y, z]] a._charges = {} a.hetatm = 1 a.radius = radius #a.number = lenAtoms + 1 addAtoms.append(a) lenAtoms += 1 for key in self.colorKeys: a.colors[key] = AtomElements[atomchr] a.opacities[key] = 1.0 else: centers.append([x, y, z]) # a = Atom(atomchr, self.residue, atomchr, top=self.mol) # a._coords = [[x,y,z]] # a._charges = {} # a.hetatm = 1 # a.number = index # index += 1 #aterials.append(AtomElements[atomchr]) #enters.append([x,y,z]) #adii.append(radius) #self.mol.allAtoms = self.residue.atoms #self.mol.geomContainer.geoms['lines'].protected = False #for com in self.autoLigandCommand.vf.cmdsWithOnAddObj: # com.onAddObjectToViewer(self.mol) #self.autoLigandCommand.vf.displayCPK(self.mol, scaleFactor=0.1) halo_centers = [] for coord in prev_coords: if not coord in centers: index = prev_coords.index(coord) removeAtoms.append(self.mol.allAtoms[index]) self.residue.assignUniqIndex( ) #this is needed to avoid Traceback later on self.mol.allAtoms.stringRepr = None #stringRepr can be very large aousing memory errors event = AddAtomsEvent(objects=addAtoms) #self.autoLigandCommand.vf.dispatchEvent(event) self.autoLigandCommand.vf.displayCPK.updateGeom(event) event = DeleteAtomsEvent(objects=removeAtoms) #self.autoLigandCommand.vf.dispatchEvent(event) self.autoLigandCommand.vf.displayCPK.updateGeom(event) for atom in removeAtoms: self.residue.atoms.remove(atom) if id == self.maxFrame: self.autoLigandCommand.halo.Set(visible=0) else: self.autoLigandCommand.halo.Set(centers=addAtoms.coords, materials=((1, 1, 0, 0.5), ), radii=0.4) #self.mol.allAtoms = self.residue.atoms #self.vf.GUI.VIEWER.Redraw() #self.vf.GUI.ROOT.update() def hide_cb(self): self.autoLigandCommand.hideGeoms() self.form.destroy()