class exANM(ANMBase): """Class for explicit ANM (exANM) method ([FT00]_). Optional arguments build a membrane lattice permit analysis of membrane effect on elastic network models in *exANM* method described in [TL12]_. .. [TL12] Lezon TR, Bahar I, Constraints Imposed by the Membrane Selectively Guide the Alternating Access Dynamics of the Glutamate Transporter GltPh """ def __init__(self, name='Unknown'): super(exANM, self).__init__(name) self._membrane = None self._combined = None def buildMembrane(self, coords, **kwargs): """Build Hessian matrix for given coordinate set. :arg coords: a coordinate set or an object with ``getCoords`` method :type coords: :class:`numpy.ndarray` :arg membrane_hi: the maximum z coordinate of the pdb default is 13.0 :type membrane_hi: float :arg membrane_lo: the minimum z coordinate of the pdb default is -13.0 :type membrane_lo: float :arg R: radius of all membrane in x-y direction default is 80. :type R: float :arg r: radius of individual barrel-type membrane protein default is 2.5. :type :arg lat: lattice type which could be FCC(face-centered-cubic)(default), SC(simple cubic), SH(simple hexagonal) :type lat: str """ if type(coords) is AtomGroup: buildAg = True else: buildAg = False try: coords = (coords._getCoords() if hasattr(coords, '_getCoords') else coords.getCoords()) except AttributeError: try: checkCoords(coords) except TypeError: raise TypeError('coords must be a Numpy array or an object ' 'with `getCoords` method') self._n_atoms = natoms = int(coords.shape[0]) pxlo = min(np.append(coords[:,0],10000)) pxhi = max(np.append(coords[:,0],-10000)) pylo = min(np.append(coords[:,1],10000)) pyhi = max(np.append(coords[:,1],-10000)) pzlo = min(np.append(coords[:,2],10000)) pzhi = max(np.append(coords[:,2],-10000)) membrane_hi = float(kwargs.get('membrane_hi', 13.0)) membrane_lo = float(kwargs.get('membrane_lo', -13.0)) R = float(kwargs.get('R', 80)) r = float(kwargs.get('r', 5)) lat = str(kwargs.get('lat', 'FCC')) lpv = assign_lpvs(lat) imax = (R + lpv[0,2] * (membrane_hi - membrane_lo)/2.)/r jmax = (R + lpv[1,2] * (membrane_hi - membrane_lo)/2.)/r kmax = (R + lpv[2,2] * (membrane_hi - membrane_lo)/2.)/r #print pxlo, pxhi, pylo, pyhi, pzlo, pzhi #print lpv[0,2],lpv[1,2],lpv[2,2] #print R,r,imax,jmax,kmax membrane = zeros((1,3)) LOGGER.timeit('_membrane') membrane = zeros((1,3)) atm = 0 for i in range(-int(imax),int(imax+1)): for j in range(-int(jmax),int(jmax+1)): for k in range(-int(kmax),int(kmax+1)): X = zeros((1,3)) for p in range(3): X[0,p]=2.*r*(i*lpv[0,p]+j*lpv[1,p]+k*lpv[2,p]) dd=0 for p in range(3): dd += X[0,p] ** 2 if dd<R**2 and X[0,2]>membrane_lo and X[0,2]<membrane_hi: if X[0,0]>pxlo-R/2 and X[0,0]<pxhi+R/2 and X[0,1]>pylo-R/2 and X[0,1]<pyhi+R/2 and X[0,2]>pzlo and X[0,2]<pzhi: if checkClash(X, coords[:natoms,:], radius=5): if atm == 0: membrane = X else: membrane = np.append(membrane, X, axis=0) atm = atm + 1 #print atm self._membrane = AtomGroup(title="Membrane") self._membrane.setCoords(membrane) self._membrane.setResnums(range(atm)) self._membrane.setResnames(["NE1" for i in range(atm)]) self._membrane.setChids(["Q" for i in range(atm)]) self._membrane.setElements(["Q1" for i in range(atm)]) self._membrane.setNames(["Q1" for i in range(atm)]) LOGGER.report('Membrane was built in %2.fs.', label='_membrane') def buildHessian(self, coords, cutoff=15., gamma=1., **kwargs): """Build Hessian matrix for given coordinate set. :arg coords: a coordinate set or an object with ``getCoords`` method :type coords: :class:`numpy.ndarray` :arg cutoff: cutoff distance (Å) for pairwise interactions, default is 15.0 Å :type cutoff: float :arg gamma: spring constant, default is 1.0 :type gamma: float :arg membrane_hi: the maximum z coordinate of the pdb default is 13.0 :type membrane_hi: float :arg membrane_lo: the minimum z coordinate of the pdb default is -13.0 :type membrane_lo: float :arg R: radius of all membrane in x-y direction default is 80. :type R: float :arg r: radius of individual barrel-type membrane protein default is 2.5. :type :arg lat: lattice type which could be FCC(face-centered-cubic)(default), SC(simple cubic), SH(simple hexagonal) :type lat: str """ try: coords = (coords._getCoords() if hasattr(coords, '_getCoords') else coords.getCoords()) except AttributeError: try: checkCoords(coords) except TypeError: raise TypeError('coords must be a Numpy array or an object ' 'with `getCoords` method') self._n_atoms = natoms = int(coords.shape[0]) if self._membrane is None: membrane_hi = float(kwargs.get('membrane_hi', 13.0)) membrane_lo = float(kwargs.get('membrane_lo', -13.0)) R = float(kwargs.get('R', 80)) r = float(kwargs.get('r', 5)) lat = str(kwargs.get('lat', 'FCC')) buildMembrane(self,coords,membrane_hi=membrane_hi, membrane_lo=membrane_lo,R=R,r=r,lat=lat) LOGGER.timeit('_exanm') coords = np.concatenate((coords,self._membrane.getCoords()),axis=0) self._combined_coords = coords total_natoms = int(coords.shape[0]) self._hessian = np.zeros((natoms*3, natoms*3), float) total_hessian = np.zeros((total_natoms*3, total_natoms*3), float) cutoff, g, gamma = checkENMParameters(cutoff, gamma) cutoff2 = cutoff * cutoff for i in range(total_natoms): res_i3 = i*3 res_i33 = res_i3+3 i_p1 = i+1 i2j_all = coords[i_p1:, :] - coords[i] for j, dist2 in enumerate((i2j_all ** 2).sum(1)): if dist2 > cutoff2: continue i2j = i2j_all[j] j += i_p1 g = gamma(dist2, i, j) res_j3 = j*3 res_j33 = res_j3+3 super_element = np.outer(i2j, i2j) * (- g / dist2) total_hessian[res_i3:res_i33, res_j3:res_j33] = super_element total_hessian[res_j3:res_j33, res_i3:res_i33] = super_element total_hessian[res_i3:res_i33, res_i3:res_i33] = total_hessian[res_i3:res_i33, res_i3:res_i33] - super_element total_hessian[res_j3:res_j33, res_j3:res_j33] = total_hessian[res_j3:res_j33, res_j3:res_j33] - super_element ss = total_hessian[:natoms*3, :natoms*3] so = total_hessian[:natoms*3, natoms*3+1:] os = total_hessian[natoms*3+1:,:natoms*3] oo = total_hessian[natoms*3+1:, natoms*3+1:] self._hessian = ss - np.dot(so, np.dot(linalg.inv(oo), os)) LOGGER.report('Hessian was built in %.2fs.', label='_exanm') self._dof = self._hessian.shape[0] def calcModes(self, n_modes=20, zeros=False, turbo=True): """Calculate normal modes. This method uses :func:`scipy.linalg.eigh` function to diagonalize the Hessian matrix. When Scipy is not found, :func:`numpy.linalg.eigh` is used. :arg n_modes: number of non-zero eigenvalues/vectors to calculate. If ``None`` is given, all modes will be calculated. :type n_modes: int or None, default is 20 :arg zeros: If ``True``, modes with zero eigenvalues will be kept. :type zeros: bool, default is ``False`` :arg turbo: Use a memory intensive, but faster way to calculate modes. :type turbo: bool, default is ``True`` """ super(exANM, self).calcModes(n_modes, zeros, turbo) def getMembrane(self): """Returns a copy of the membrane coordinates.""" if self._membrane is not None: return self._membrane.copy() def _getMembrane(self): return self._membrane def combineMembraneProtein(self, coords): try: if type(coords) is AtomGroup: self._combined = coords + self._membrane except TypeError: raise TypeError('coords must be an AtomGroup object ' 'with `getCoords` method') def writeCombinedPDB(self,filename): """ Given membrane coordinates it will write a pdb file with membrane coordinates. :arg filename: filename for the pdb file. :type filename: str :arg membrane: membrane coordinates or the membrane structure. :type membrane: nd.array """ if self._combined is None: combineMembraneProtein(self,coords) try: writePDB(filename,self._combined) except TypeError: raise "Membrane not found. Use buildMembrane() function."
def parsePSF(filename, title=None, ag=None): """Returns an :class:`.AtomGroup` instance storing data parsed from X-PLOR format PSF file *filename*. Atom and bond information is parsed from the file. If *title* is not given, *filename* will be set as the title of the :class:`.AtomGroup` instance. An :class:`.AtomGroup` instance may be provided as *ag* argument. When provided, *ag* must have the same number of atoms in the same order as the file. Data from PSF file will be added to the *ag*. This may overwrite present data if it overlaps with PSF file content. This function now includes the angles, dihedrals, and impropers sections as well as donors, acceptors and crossterms!""" if ag is not None: if not isinstance(ag, AtomGroup): raise TypeError('ag must be an AtomGroup instance') psf = openFile(filename, 'rb') line = psf.readline() while line: line = line.strip() if line.endswith(b'!NATOM'): n_atoms = int(line.split(b'!')[0]) break line = psf.readline() if title is None: title = os.path.splitext(os.path.split(filename)[1])[0] else: title = str(title) if ag is None: ag = AtomGroup(title) else: if n_atoms != ag.numAtoms(): raise ValueError('ag and PSF file must have same number of atoms') serials = zeros(n_atoms, ATOMIC_FIELDS['serial'].dtype) segnames = zeros(n_atoms, ATOMIC_FIELDS['segment'].dtype) resnums = zeros(n_atoms, ATOMIC_FIELDS['resnum'].dtype) resnames = zeros(n_atoms, ATOMIC_FIELDS['resname'].dtype) atomnames = zeros(n_atoms, ATOMIC_FIELDS['name'].dtype) atomtypes = zeros(n_atoms, ATOMIC_FIELDS['type'].dtype) charges = zeros(n_atoms, ATOMIC_FIELDS['charge'].dtype) masses = zeros(n_atoms, ATOMIC_FIELDS['mass'].dtype) n = 0 n_bonds = 0 for line in psf: if line.strip() == b'': continue if b'!NBOND:' in line.upper(): items = line.split() n_bonds = int(items[0]) break if n + 1 > n_atoms: continue if len(line) <= 71: serials[n] = line[:8] segnames[n] = line[9:13].strip() resnums[n] = line[14:19] resnames[n] = line[19:23].strip() atomnames[n] = line[24:28].strip() atomtypes[n] = line[29:35].strip() charges[n] = line[35:44] masses[n] = line[50:60] else: items = line.split() serials[n] = items[0] segnames[n] = items[1] resnums[n] = items[2] resnames[n] = items[3] atomnames[n] = items[4] atomtypes[n] = items[5] charges[n] = items[6] masses[n] = items[7] n += 1 if n < n_atoms: raise IOError( 'number of lines in PSF atoms block is less than the number of ' 'atoms') n_angles = 0 lines = [] for i, line in enumerate(psf): if line.strip() == b'': continue if b'!NTHETA' in line: items = line.split() n_angles = int(items[0]) break lines.append(line.decode(encoding='UTF-8')) lines = ''.join(lines) b_array = fromstring(lines, count=n_bonds * 2, dtype=int, sep=' ') if len(b_array) != n_bonds * 2: raise IOError('number of bonds expected and parsed do not match') n_dihedrals = 0 lines = [] for i, line in enumerate(psf): if line.strip() == b'': continue if b'!NPHI' in line: items = line.split() n_dihedrals = int(items[0]) break lines.append(line.decode(encoding='UTF-8')) lines = ''.join(lines) a_array = fromstring(lines, count=n_angles * 3, dtype=int, sep=' ') if len(a_array) != n_angles * 3: raise IOError('number of angles expected and parsed do not match') n_impropers = 0 lines = [] for i, line in enumerate(psf): if line.strip() == b'': continue if b'!NIMPHI' in line: items = line.split() n_impropers = int(items[0]) break lines.append(line.decode(encoding='UTF-8')) lines = ''.join(lines) d_array = fromstring(lines, count=n_dihedrals * 4, dtype=int, sep=' ') if len(d_array) != n_dihedrals * 4: raise IOError('number of dihedrals expected and parsed do not match') n_donors = 0 lines = [] for i, line in enumerate(psf): if line.strip() == b'': continue if b'!NDON' in line: items = line.split() n_donors = int(items[0]) break lines.append(line.decode(encoding='UTF-8')) lines = ''.join(lines) i_array = fromstring(lines, count=n_impropers * 4, dtype=int, sep=' ') if len(i_array) != n_impropers * 4: raise IOError('number of impropers expected and parsed do not match') n_acceptors = 0 lines = [] for i, line in enumerate(psf): if line.strip() == b'': continue if b'!NACC' in line: items = line.split() n_acceptors = int(items[0]) break lines.append(line.decode(encoding='UTF-8')) lines = ''.join(lines) do_array = fromstring(lines, count=n_donors * 2, dtype=int, sep=' ') if len(do_array) != n_donors * 2: raise IOError('number of donors expected and parsed do not match') n_exclusions = 0 lines = [] for i, line in enumerate(psf): if line.strip() == b'': continue if b'!NNB' in line: items = line.split() n_exclusions = int(items[0]) break lines.append(line.decode(encoding='UTF-8')) lines = ''.join(lines) ac_array = fromstring(lines, count=n_acceptors * 2, dtype=int, sep=' ') if len(ac_array) != n_acceptors * 2: raise IOError('number of acceptors expected and parsed do not match') lines = [] for i, line in enumerate(psf): if line.strip() == b'': continue if b'!' in line: break lines.append(line.decode(encoding='UTF-8')) lines = ''.join(lines) nbe_array = fromstring(lines, count=n_exclusions * 2, dtype=int, sep=' ') if len(nbe_array) != n_exclusions * 2: raise IOError( 'number of nonbonded exclusions expected and parsed do not match') n_crossterms = 0 for i, line in enumerate(psf): if b'!NCRTERM' in line: items = line.split() n_crossterms = int(items[0]) break lines = [] for i, line in enumerate(psf): lines.append(line.decode(encoding='UTF-8')) lines = ''.join(lines) c_array = fromstring(lines, count=n_crossterms * 4, dtype=int, sep=' ') if len(c_array) != n_crossterms * 4: raise IOError('number of crossterms expected and parsed do not match') psf.close() ag.setSerials(serials) ag.setSegnames(segnames) ag.setResnums(resnums) ag.setResnames(resnames) ag.setNames(atomnames) ag.setTypes(atomtypes) ag.setCharges(charges) ag.setMasses(masses) if n_bonds > 0: b_array = add(b_array, -1, b_array) ag.setBonds(b_array.reshape((n_bonds, 2))) if n_angles > 0: a_array = add(a_array, -1, a_array) ag.setAngles(a_array.reshape((n_angles, 3))) if n_dihedrals > 0: d_array = add(d_array, -1, d_array) ag.setDihedrals(d_array.reshape((n_dihedrals, 4))) if n_impropers > 0: i_array = add(i_array, -1, i_array) ag.setImpropers(i_array.reshape((n_impropers, 4))) if n_donors > 0: do_array = add(do_array, -1, do_array) ag.setDonors(do_array.reshape((n_donors, 2))) if n_acceptors > 0: ac_array = add(ac_array, -1, ac_array) ag.setAcceptors(ac_array.reshape((n_acceptors, 2))) if n_exclusions > 0: nbe_array = add(nbe_array, -1, nbe_array) ag.setNBExclusions(nbe_array.reshape((n_exclusions, 2))) if n_crossterms > 0: c_array = add(c_array, -1, c_array) ag.setCrossterms(c_array.reshape((n_crossterms, 4))) return ag
def fetchPDBLigand(cci, filename=None): """Fetch PDB ligand data from PDB_ for chemical component *cci*. *cci* may be 3-letter chemical component identifier or a valid XML filename. If *filename* is given, XML file will be saved with that name. If you query ligand data frequently, you may configure ProDy to save XML files in your computer. Set ``ligand_xml_save`` option **True**, i.e. ``confProDy(ligand_xml_save=True)``. Compressed XML files will be save to ProDy package folder, e.g. :file:`/home/user/.prody/pdbligands`. Each file is around 5Kb when compressed. This function is compatible with PDBx/PDBML v 4.0. Ligand data is returned in a dictionary. Ligand coordinate atom data with *model* and *ideal* coordinate sets are also stored in this dictionary. Note that this dictionary will contain data that is present in the XML file and all Ligand Expo XML files do not contain every possible data field. So, it may be better if you use :meth:`dict.get` instead of indexing the dictionary, e.g. to retrieve formula weight (or relative molar mass) of the chemical component use ``data.get('formula_weight')`` instead of ``data['formula_weight']`` to avoid exceptions when this data field is not found in the XML file. URL and/or path of the XML file are returned in the dictionary with keys ``url`` and ``path``, respectively. Following example downloads data for ligand STI (a.k.a. Gleevec and Imatinib) and calculates RMSD between model (X-ray structure 1IEP) and ideal (energy minimized) coordinate sets: .. ipython:: python from prody import * ligand_data = fetchPDBLigand('STI') ligand_data['model_coordinates_db_code'] ligand_model = ligand_data['model'] ligand_ideal = ligand_data['ideal'] transformation = superpose(ligand_ideal.noh, ligand_model.noh) calcRMSD(ligand_ideal.noh, ligand_model.noh)""" if not isinstance(cci, str): raise TypeError('cci must be a string') if isfile(cci): inp = openFile(cci) xml = inp.read() inp.close() url = None path = cci cci = splitext(splitext(split(cci)[1])[0])[0].upper() elif len(cci) > 4 or not cci.isalnum(): raise ValueError('cci must be 3-letters long and alphanumeric or ' 'a valid filename') else: xml = None cci = cci.upper() if SETTINGS.get('ligand_xml_save'): folder = join(getPackagePath(), 'pdbligands') if not isdir(folder): makePath(folder) xmlgz = path = join(folder, cci + '.xml.gz') if isfile(xmlgz): with openFile(xmlgz) as inp: xml = inp.read() else: path = None #url = ('http://ligand-expo.rcsb.org/reports/{0[0]}/{0}/{0}' # '.xml'.format(cci.upper())) url = 'http://www.pdb.org/pdb/files/ligand/{0}.xml'.format(cci.upper()) if not xml: #'http://www.pdb.org/pdb/files/ligand/{0}.xml' try: inp = openURL(url) except IOError: raise IOError('XML file for ligand {0} is not found online' .format(cci)) else: xml = inp.read() inp.close() if filename: out = openFile(filename, mode='w', folder=folder) out.write(xml) out.close() if SETTINGS.get('ligand_xml_save'): with openFile(xmlgz, 'w') as out: out.write(xml) import xml.etree.cElementTree as ET root = ET.XML(xml) if (root.get('{http://www.w3.org/2001/XMLSchema-instance}' 'schemaLocation') != 'http://pdbml.pdb.org/schema/pdbx-v40.xsd pdbx-v40.xsd'): LOGGER.warn('XML is not in PDBx/PDBML v 4.0 format, resulting ' 'dictionary may not contain all data fields') ns = root.tag[:root.tag.rfind('}')+1] len_ns = len(ns) dict_ = {'url': url, 'path': path} for child in list(root.find(ns + 'chem_compCategory')[0]): tag = child.tag[len_ns:] if tag.startswith('pdbx_'): tag = tag[5:] dict_[tag] = child.text dict_['formula_weight'] = float(dict_.get('formula_weight')) identifiers_and_descriptors = [] results = root.find(ns + 'pdbx_chem_comp_identifierCategory') if results: identifiers_and_descriptors.extend(results) results = root.find(ns + 'pdbx_chem_comp_descriptorCategory') if results: identifiers_and_descriptors.extend(results) for child in identifiers_and_descriptors: program = child.get('program').replace(' ', '_') type_ = child.get('type').replace(' ', '_') dict_[program + '_' + type_] = child[0].text dict_[program + '_version'] = child.get('program_version') dict_['audits'] = [(audit.get('action_type'), audit.get('date')) for audit in list(root.find(ns + 'pdbx_chem_comp_auditCategory'))] atoms = list(root.find(ns + 'chem_comp_atomCategory')) n_atoms = len(atoms) ideal_coords = np.zeros((n_atoms, 3)) model_coords = np.zeros((n_atoms, 3)) atomnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['name'].dtype) elements = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['element'].dtype) resnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['resname'].dtype) charges = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['charge'].dtype) resnums = np.ones(n_atoms, dtype=ATOMIC_FIELDS['charge'].dtype) alternate_atomnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['name'].dtype) leaving_atom_flags = np.zeros(n_atoms, np.bool) aromatic_flags = np.zeros(n_atoms, np.bool) stereo_configs = np.zeros(n_atoms, np.bool) ordinals = np.zeros(n_atoms, int) name2index = {} for i, atom in enumerate(atoms): data = dict([(child.tag[len_ns:], child.text) for child in list(atom)]) name = data.get('pdbx_component_atom_id', 'X') name2index[name] = i atomnames[i] = name elements[i] = data.get('type_symbol', 'X') resnames[i] = data.get('pdbx_component_comp_id', 'UNK') charges[i] = float(data.get('charge', 0)) alternate_atomnames[i] = data.get('alt_atom_id', 'X') leaving_atom_flags[i] = data.get('pdbx_leaving_atom_flag') == 'Y' aromatic_flags[i] = data.get('pdbx_atomatic_flag') == 'Y' stereo_configs[i] = data.get('pdbx_stereo_config') == 'Y' ordinals[i] = int(data.get('pdbx_ordinal', 0)) model_coords[i, 0] = float(data.get('model_Cartn_x', 0)) model_coords[i, 1] = float(data.get('model_Cartn_y', 0)) model_coords[i, 2] = float(data.get('model_Cartn_z', 0)) ideal_coords[i, 0] = float(data.get('pdbx_model_Cartn_x_ideal', 0)) ideal_coords[i, 1] = float(data.get('pdbx_model_Cartn_y_ideal', 0)) ideal_coords[i, 2] = float(data.get('pdbx_model_Cartn_z_ideal', 0)) pdbid = dict_.get('model_coordinates_db_code') if pdbid: model = AtomGroup(cci + ' model ({0})'.format(pdbid)) else: model = AtomGroup(cci + ' model') model.setCoords(model_coords) model.setNames(atomnames) model.setResnames(resnames) model.setResnums(resnums) model.setElements(elements) model.setCharges(charges) model.setFlags('leaving_atom_flags', leaving_atom_flags) model.setFlags('aromatic_flags', aromatic_flags) model.setFlags('stereo_configs', stereo_configs) model.setData('ordinals', ordinals) model.setData('alternate_atomnames', alternate_atomnames) dict_['model'] = model ideal = model.copy() ideal.setTitle(cci + ' ideal') ideal.setCoords(ideal_coords) dict_['ideal'] = ideal bonds = [] warned = set() for bond in list(root.find(ns + 'chem_comp_bondCategory') or bonds): name_1 = bond.get('atom_id_1') name_2 = bond.get('atom_id_2') try: bonds.append((name2index[name_1], name2index[name_2])) except KeyError: if name_1 not in warned and name_1 not in name2index: warned.add(name_1) LOGGER.warn('{0} specified {1} in bond category is not ' 'a valid atom name.'.format(repr(name_1), cci)) if name_2 not in warned and name_2 not in name2index: warned.add(name_2) LOGGER.warn('{0} specified {1} in bond category is not ' 'a valid atom name.'.format(repr(name_2), cci)) if bonds: bonds = np.array(bonds, int) model.setBonds(bonds) ideal.setBonds(bonds) return dict_
def parsePSF(filename, title=None, ag=None): """Return an :class:`.AtomGroup` instance storing data parsed from X-PLOR format PSF file *filename*. Atom and bond information is parsed from the file. If *title* is not given, *filename* will be set as the title of the :class:`.AtomGroup` instance. An :class:`.AtomGroup` instance may be provided as *ag* argument. When provided, *ag* must have the same number of atoms in the same order as the file. Data from PSF file will be added to the *ag*. This may overwrite present data if it overlaps with PSF file content. Note that this function does not evaluate angles, dihedrals, and impropers sections.""" if ag is not None: if not isinstance(ag, AtomGroup): raise TypeError('ag must be an AtomGroup instance') psf = openFile(filename, 'rb') line = psf.readline() i_line = 1 while line: line = line.strip() if line.endswith('!NATOM'): n_atoms = int(line.split('!')[0]) break line = psf.readline() i_line += 1 if title is None: title = os.path.splitext(os.path.split(filename)[1])[0] else: title = str(title) if ag is None: ag = AtomGroup(title) else: if n_atoms != ag.numAtoms(): raise ValueError('ag and PSF file must have same number of atoms') serials = zeros(n_atoms, ATOMIC_FIELDS['serial'].dtype) segnames = zeros(n_atoms, ATOMIC_FIELDS['segment'].dtype) resnums = zeros(n_atoms, ATOMIC_FIELDS['resnum'].dtype) resnames = zeros(n_atoms, ATOMIC_FIELDS['resname'].dtype) atomnames = zeros(n_atoms, ATOMIC_FIELDS['name'].dtype) atomtypes = zeros(n_atoms, ATOMIC_FIELDS['type'].dtype) charges = zeros(n_atoms, ATOMIC_FIELDS['charge'].dtype) masses = zeros(n_atoms, ATOMIC_FIELDS['mass'].dtype) lines = psf.readlines(71 * (n_atoms + 5)) if len(lines) < n_atoms: raise IOError('number of lines in PSF is less than the number of ' 'atoms') for i, line in enumerate(lines): if i == n_atoms: break i_line += 1 if len(line) <= 71: serials[i] = line[:8] segnames[i] = line[9:13].strip() resnums[i] = line[14:19] resnames[i] = line[19:23].strip() atomnames[i] = line[24:28].strip() atomtypes[i] = line[29:35].strip() charges[i] = line[35:44] masses[i] = line[50:60] else: items = line.split() serials[i] = items[0] segnames[i] = items[1] resnums[i] = items[2] resnames[i] = items[3] atomnames[i] = items[4] atomtypes[i] = items[5] charges[i] = items[6] masses[i] = items[7] i = n_atoms while 1: line = lines[i].split() if len(line) >= 2 and line[1] == '!NBOND:': n_bonds = int(line[0]) break i += 1 lines = ''.join(lines[i+1:]) + psf.read(n_bonds/4 * 71) array = fromstring(lines, count=n_bonds*2, dtype=int, sep=' ') if len(array) != n_bonds*2: raise IOError('number of bonds expected and parsed do not match') psf.close() ag.setSerials(serials) ag.setSegnames(segnames) ag.setResnums(resnums) ag.setResnames(resnames) ag.setNames(atomnames) ag.setTypes(atomtypes) ag.setCharges(charges) ag.setMasses(masses) array = add(array, -1, array) ag.setBonds(array.reshape((n_bonds, 2))) return ag
class exANM(ANMBase): """Class for explicit ANM (exANM) method ([FT00]_). Optional arguments build a membrane lattice permit analysis of membrane effect on elastic network models in *exANM* method described in [TL12]_. .. [TL12] Lezon TR, Bahar I, Constraints Imposed by the Membrane Selectively Guide the Alternating Access Dynamics of the Glutamate Transporter GltPh """ def __init__(self, name='Unknown'): super(exANM, self).__init__(name) self._membrane = None self._combined = None def buildMembrane(self, coords, **kwargs): """Build Hessian matrix for given coordinate set. :arg coords: a coordinate set or an object with ``getCoords`` method :type coords: :class:`numpy.ndarray` :arg membrane_hi: the maximum z coordinate of the pdb default is 13.0 :type membrane_hi: float :arg membrane_lo: the minimum z coordinate of the pdb default is -13.0 :type membrane_lo: float :arg R: radius of all membrane in x-y direction default is 80. :type R: float :arg r: radius of individual barrel-type membrane protein default is 2.5. :type :arg lat: lattice type which could be FCC(face-centered-cubic)(default), SC(simple cubic), SH(simple hexagonal) :type lat: str """ if type(coords) is AtomGroup: buildAg = True else: buildAg = False try: coords = (coords._getCoords() if hasattr(coords, '_getCoords') else coords.getCoords()) except AttributeError: try: checkCoords(coords) except TypeError: raise TypeError('coords must be a Numpy array or an object ' 'with `getCoords` method') self._n_atoms = natoms = int(coords.shape[0]) pxlo = min(np.append(coords[:, 0], 10000)) pxhi = max(np.append(coords[:, 0], -10000)) pylo = min(np.append(coords[:, 1], 10000)) pyhi = max(np.append(coords[:, 1], -10000)) pzlo = min(np.append(coords[:, 2], 10000)) pzhi = max(np.append(coords[:, 2], -10000)) membrane_hi = float(kwargs.get('membrane_hi', 13.0)) membrane_lo = float(kwargs.get('membrane_lo', -13.0)) R = float(kwargs.get('R', 80)) r = float(kwargs.get('r', 5)) lat = str(kwargs.get('lat', 'FCC')) lpv = assign_lpvs(lat) imax = (R + lpv[0, 2] * (membrane_hi - membrane_lo) / 2.) / r jmax = (R + lpv[1, 2] * (membrane_hi - membrane_lo) / 2.) / r kmax = (R + lpv[2, 2] * (membrane_hi - membrane_lo) / 2.) / r #print pxlo, pxhi, pylo, pyhi, pzlo, pzhi #print lpv[0,2],lpv[1,2],lpv[2,2] #print R,r,imax,jmax,kmax membrane = zeros((1, 3)) LOGGER.timeit('_membrane') membrane = zeros((1, 3)) atm = 0 for i in range(-int(imax), int(imax + 1)): for j in range(-int(jmax), int(jmax + 1)): for k in range(-int(kmax), int(kmax + 1)): X = zeros((1, 3)) for p in range(3): X[0, p] = 2. * r * (i * lpv[0, p] + j * lpv[1, p] + k * lpv[2, p]) dd = 0 for p in range(3): dd += X[0, p]**2 if dd < R**2 and X[0, 2] > membrane_lo and X[0, 2] < membrane_hi: if X[0, 0] > pxlo - R / 2 and X[ 0, 0] < pxhi + R / 2 and X[ 0, 1] > pylo - R / 2 and X[ 0, 1] < pyhi + R / 2 and X[ 0, 2] > pzlo and X[0, 2] < pzhi: if checkClash(X, coords[:natoms, :], radius=5): if atm == 0: membrane = X else: membrane = np.append(membrane, X, axis=0) atm = atm + 1 #print atm self._membrane = AtomGroup(title="Membrane") self._membrane.setCoords(membrane) self._membrane.setResnums(list(range(atm))) self._membrane.setResnames(["NE1" for i in range(atm)]) self._membrane.setChids(["Q" for i in range(atm)]) self._membrane.setElements(["Q1" for i in range(atm)]) self._membrane.setNames(["Q1" for i in range(atm)]) LOGGER.report('Membrane was built in %2.fs.', label='_membrane') def buildHessian(self, coords, cutoff=15., gamma=1., **kwargs): """Build Hessian matrix for given coordinate set. :arg coords: a coordinate set or an object with ``getCoords`` method :type coords: :class:`numpy.ndarray` :arg cutoff: cutoff distance (Å) for pairwise interactions, default is 15.0 Å :type cutoff: float :arg gamma: spring constant, default is 1.0 :type gamma: float :arg membrane_hi: the maximum z coordinate of the pdb default is 13.0 :type membrane_hi: float :arg membrane_lo: the minimum z coordinate of the pdb default is -13.0 :type membrane_lo: float :arg R: radius of all membrane in x-y direction default is 80. :type R: float :arg r: radius of individual barrel-type membrane protein default is 2.5. :type :arg lat: lattice type which could be FCC(face-centered-cubic)(default), SC(simple cubic), SH(simple hexagonal) :type lat: str """ try: coords = (coords._getCoords() if hasattr(coords, '_getCoords') else coords.getCoords()) except AttributeError: try: checkCoords(coords) except TypeError: raise TypeError('coords must be a Numpy array or an object ' 'with `getCoords` method') self._n_atoms = natoms = int(coords.shape[0]) if self._membrane is None: membrane_hi = float(kwargs.get('membrane_hi', 13.0)) membrane_lo = float(kwargs.get('membrane_lo', -13.0)) R = float(kwargs.get('R', 80)) r = float(kwargs.get('r', 5)) lat = str(kwargs.get('lat', 'FCC')) self.buildMembrane(coords, membrane_hi=membrane_hi, membrane_lo=membrane_lo, R=R, r=r, lat=lat) LOGGER.timeit('_exanm') coords = np.concatenate((coords, self._membrane.getCoords()), axis=0) self._combined_coords = coords total_natoms = int(coords.shape[0]) self._hessian = np.zeros((natoms * 3, natoms * 3), float) total_hessian = np.zeros((total_natoms * 3, total_natoms * 3), float) cutoff, g, gamma = checkENMParameters(cutoff, gamma) cutoff2 = cutoff * cutoff for i in range(total_natoms): res_i3 = i * 3 res_i33 = res_i3 + 3 i_p1 = i + 1 i2j_all = coords[i_p1:, :] - coords[i] for j, dist2 in enumerate((i2j_all**2).sum(1)): if dist2 > cutoff2: continue i2j = i2j_all[j] j += i_p1 g = gamma(dist2, i, j) res_j3 = j * 3 res_j33 = res_j3 + 3 super_element = np.outer(i2j, i2j) * (-g / dist2) total_hessian[res_i3:res_i33, res_j3:res_j33] = super_element total_hessian[res_j3:res_j33, res_i3:res_i33] = super_element total_hessian[res_i3:res_i33, res_i3:res_i33] = total_hessian[ res_i3:res_i33, res_i3:res_i33] - super_element total_hessian[res_j3:res_j33, res_j3:res_j33] = total_hessian[ res_j3:res_j33, res_j3:res_j33] - super_element ss = total_hessian[:natoms * 3, :natoms * 3] so = total_hessian[:natoms * 3, natoms * 3 + 1:] os = total_hessian[natoms * 3 + 1:, :natoms * 3] oo = total_hessian[natoms * 3 + 1:, natoms * 3 + 1:] self._hessian = ss - np.dot(so, np.dot(linalg.inv(oo), os)) LOGGER.report('Hessian was built in %.2fs.', label='_exanm') self._dof = self._hessian.shape[0] def calcModes(self, n_modes=20, zeros=False, turbo=True): """Calculate normal modes. This method uses :func:`scipy.linalg.eigh` function to diagonalize the Hessian matrix. When Scipy is not found, :func:`numpy.linalg.eigh` is used. :arg n_modes: number of non-zero eigenvalues/vectors to calculate. If ``None`` is given, all modes will be calculated. :type n_modes: int or None, default is 20 :arg zeros: If ``True``, modes with zero eigenvalues will be kept. :type zeros: bool, default is ``False`` :arg turbo: Use a memory intensive, but faster way to calculate modes. :type turbo: bool, default is ``True`` """ super(exANM, self).calcModes(n_modes, zeros, turbo) def getMembrane(self): """Returns a copy of the membrane coordinates.""" if self._membrane is not None: return self._membrane.copy() def _getMembrane(self): return self._membrane def combineMembraneProtein(self, coords): try: if type(coords) is AtomGroup: self._combined = coords + self._membrane except TypeError: raise TypeError('coords must be an AtomGroup object ' 'with `getCoords` method') def writeCombinedPDB(self, filename): """ Given membrane coordinates it will write a pdb file with membrane coordinates. :arg filename: filename for the pdb file. :type filename: str :arg membrane: membrane coordinates or the membrane structure. :type membrane: nd.array """ if self._combined is None: combineMembraneProtein(self, coords) try: writePDB(filename, self._combined) except TypeError: raise "Membrane not found. Use buildMembrane() function."
def parseEMDStream(stream, **kwargs): """Parse lines of data stream from an EMD/MRC2014 file and optionally return an :class:`.AtomGroup` containing TRN nodes based on it. :arg stream: Any object with the method ``readlines`` (e.g. :class:`file`, buffer, stdin) """ cutoff = kwargs.get('cutoff', None) min_cutoff = kwargs.get('min_cutoff', cutoff) if min_cutoff is not None: if isinstance(min_cutoff, Number): min_cutoff = float(min_cutoff) else: raise TypeError('min_cutoff should be a number or None') max_cutoff = kwargs.get('max_cutoff', None) if max_cutoff is not None: if isinstance(max_cutoff, Number): max_cutoff = float(max_cutoff) else: raise TypeError('max_cutoff should be a number or None') n_nodes = kwargs.get('n_nodes', 0) num_iter = int(kwargs.get('num_iter', 20)) map = kwargs.get('map', False) if not isinstance(n_nodes, int): raise TypeError('n_nodes should be an integer') if n_nodes > 0: make_nodes = True else: make_nodes = False map = True LOGGER.info('As n_nodes is less than or equal to 0, no nodes will be' ' made and the raw map will be returned') emd = EMDMAP(stream, min_cutoff, max_cutoff) if make_nodes: title_suffix = kwargs.get('title_suffix', '') atomgroup = AtomGroup( str(kwargs.get('title', 'Unknown')) + title_suffix) atomgroup._n_atoms = n_nodes coordinates = np.zeros((n_nodes, 3), dtype=float) atomnames = np.zeros(n_nodes, dtype=ATOMIC_FIELDS['name'].dtype) resnames = np.zeros(n_nodes, dtype=ATOMIC_FIELDS['resname'].dtype) resnums = np.zeros(n_nodes, dtype=ATOMIC_FIELDS['resnum'].dtype) chainids = np.zeros(n_nodes, dtype=ATOMIC_FIELDS['chain'].dtype) trn = TRNET(n_nodes=n_nodes) trn.inputMap(emd, sample='density') trn.run(tmax=num_iter) for i in range(n_nodes): coordinates[i, :] = trn.W[i, :] atomnames[i] = 'B' resnames[i] = 'CGB' resnums[i] = i + 1 chainids[i] = 'X' atomgroup.setCoords(coordinates) atomgroup.setNames(atomnames) atomgroup.setResnames(resnames) atomgroup.setResnums(resnums) atomgroup.setChids(chainids) if make_nodes: if map: return atomgroup, emd else: return atomgroup else: return emd
class exANM(ANM): """Class for explicit ANM (exANM) method ([FT00]_). Optional arguments build a membrane lattice permit analysis of membrane effect on elastic network models in *exANM* method described in [TL12]_. .. [TL12] Lezon TR, Bahar I, Constraints Imposed by the Membrane Selectively Guide the Alternating Access Dynamics of the Glutamate Transporter GltPh """ def __init__(self, name='Unknown'): super(exANM, self).__init__(name) self._membrane = None self._combined = None def buildMembrane(self, coords, **kwargs): """Build membrane lattice around **coords**. :arg coords: a coordinate set or an object with ``getCoords`` method :type coords: :class:`numpy.ndarray` :arg membrane_high: the maximum z coordinate of the membrane. Default is **13.0** :type membrane_high: float :arg membrane_low: the minimum z coordinate of the membrane. Default is **-13.0** :type membrane_low: float :arg R: radius of all membrane in x-y direction. Default is **80** :type R: float :arg Ri: inner radius of the membrane in x-y direction if it needs to be hollow. Default is **0**, which is not hollow :type Ri: float :arg r: radius of each membrane node. Default is **3.1** :type r: float :arg lat: lattice type which could be **FCC** (face-centered-cubic, default), **SC** (simple cubic), **SH** (simple hexagonal) :type lat: str :arg exr: exclusive radius of each protein node. Default is **5.0** :type exr: float :arg hull: whether use convex hull to determine the protein's interior. Turn it off if protein is multimer. Default is **True** :type hull: bool :arg center: whether transform the structure to the origin (only x- and y-axis). Default is **True** :type center: bool """ atoms = coords try: coords = (coords._getCoords() if hasattr(coords, '_getCoords') else coords.getCoords()) except AttributeError: try: checkCoords(coords) except TypeError: raise TypeError('coords must be a Numpy array or an object ' 'with `getCoords` method') self._n_atoms = int(coords.shape[0]) LOGGER.timeit('_membrane') depth = kwargs.pop('depth', None) h = depth / 2 if depth is not None else None h = kwargs.pop('h', h) if h is not None: h = float(h) hu = h hl = -h else: hu = kwargs.pop('membrane_high', 13.0) hu = kwargs.pop('high', hu) hu = float(hu) hl = kwargs.pop('membrane_low', -13.0) hl = kwargs.pop('low', hl) hl = float(hl) R = float(kwargs.pop('R', 80.)) Ri = float(kwargs.pop('Ri', 0.)) r = float(kwargs.pop('r', 3.1)) lat = str(kwargs.pop('lat', 'FCC')) exr = float(kwargs.pop('exr', 5.)) use_hull = kwargs.pop('hull', True) centering = kwargs.pop('center', True) V = assign_lpvs(lat) if centering: c0 = coords.mean(axis=0) c0[-1] = 0. coords -= c0 # determine transmembrane part torf = np.logical_and(coords[:, -1] < hu, coords[:, -1] > hl) transmembrane = coords[torf, :] if not np.any(torf): raise ValueError( 'No region was identified as membrane. Please use a structure from opm/ppm.' ) if use_hull: from scipy.spatial import ConvexHull hull = ConvexHull(transmembrane) else: hull = transmembrane ## determine the bound for ijk imax = (R + V[0, 2] * (hu - hl) / 2.) / r jmax = (R + V[1, 2] * (hu - hl) / 2.) / r kmax = (R + V[2, 2] * (hu - hl) / 2.) / r imax = int(ceil(imax)) jmax = int(ceil(jmax)) kmax = int(ceil(kmax)) membrane = [] atm = 0 for i in range(-imax, imax): for j in range(-jmax, jmax): for k in range(-kmax, kmax): c = array([i, j, k]) xyz = 2. * r * dot(c, V) if xyz[2]>hl and xyz[2]<hu and \ xyz[0]>-R and xyz[0]<R and \ xyz[1]>-R and xyz[1]<R: dd = norm(xyz[:2]) if dd < R and dd > Ri: if checkClash(xyz, hull, radius=exr): membrane.append(xyz) atm = atm + 1 membrane = array(membrane) if len(membrane) == 0: self._membrane = None LOGGER.warn( 'no membrane is built. The protein should be transformed to the correct origin as in OPM' ) return coords else: self._membrane = AtomGroup(title="Membrane") self._membrane.setCoords(membrane) self._membrane.setResnums(range(atm)) self._membrane.setResnames(["NE1" for i in range(atm)]) self._membrane.setChids(["Q" for i in range(atm)]) self._membrane.setElements(["Q1" for i in range(atm)]) self._membrane.setNames(["Q1" for i in range(atm)]) LOGGER.report('Membrane was built in %2.fs.', label='_membrane') coords = self._combineMembraneProtein(atoms) return coords def buildHessian(self, coords, cutoff=15., gamma=1., **kwargs): """Build Hessian matrix for given coordinate set. **kwargs** are passed to :method:`.buildMembrane`. :arg coords: a coordinate set or an object with ``getCoords`` method :type coords: :class:`numpy.ndarray` :arg cutoff: cutoff distance (Å) for pairwise interactions, default is 15.0 Å :type cutoff: float :arg gamma: spring constant, default is 1.0 :type gamma: float """ atoms = coords turbo = kwargs.pop('turbo', True) try: coords = (coords._getCoords() if hasattr(coords, '_getCoords') else coords.getCoords()) except AttributeError: try: checkCoords(coords) except TypeError: raise TypeError('coords must be a Numpy array or an object ' 'with `getCoords` method') n_atoms = int(coords.shape[0]) if self._membrane is None: coords = self.buildMembrane(atoms, **kwargs) else: coords = self._combined.getCoords() system = zeros(coords.shape[0], dtype=bool) system[:n_atoms] = True LOGGER.timeit('_exanm') if turbo: self._hessian = buildReducedHessian(coords, system, cutoff, gamma, **kwargs) else: super(exANM, self).buildHessian(coords, cutoff, gamma, **kwargs) system = np.repeat(system, 3) self._hessian = _reduceModel(self._hessian, system) LOGGER.report('Hessian was built in %.2fs.', label='_exanm') self._dof = self._hessian.shape[0] self._n_atoms = n_atoms def calcModes(self, n_modes=20, zeros=False, turbo=True): """Calculate normal modes. This method uses :func:`scipy.linalg.eigh` function to diagonalize the Hessian matrix. When Scipy is not found, :func:`numpy.linalg.eigh` is used. :arg n_modes: number of non-zero eigenvalues/vectors to calculate. If **None** is given, all modes will be calculated. :type n_modes: int or None, default is 20 :arg zeros: If **True**, modes with zero eigenvalues will be kept. :type zeros: bool, default is **True** :arg turbo: Use a memory intensive, but faster way to calculate modes. :type turbo: bool, default is **True** """ super(exANM, self).calcModes(n_modes, zeros, turbo) def getMembrane(self): """Returns a copy of the membrane coordinates.""" return self._membrane def getCombined(self): """Returns a copy of the combined atoms or coordinates.""" return self._combined def _combineMembraneProtein(self, coords): if self._membrane is not None: if isinstance(coords, Atomic): self._combined = coords.copy() + self._membrane coords = self._combined.getCoords() else: self._combined = coords = np.concatenate( (coords, self._membrane.getCoords()), axis=0) else: self._combined = coords = copy(coords) return coords
def fetchPDBLigand(cci, filename=None): """Fetch PDB ligand data from PDB_ for chemical component *cci*. *cci* may be 3-letter chemical component identifier or a valid XML filename. If *filename* is given, XML file will be saved with that name. If you query ligand data frequently, you may configure ProDy to save XML files in your computer. Set ``ligand_xml_save`` option **True**, i.e. ``confProDy(ligand_xml_save=True)``. Compressed XML files will be save to ProDy package folder, e.g. :file:`/home/user/.prody/pdbligands`. Each file is around 5Kb when compressed. This function is compatible with PDBx/PDBML v 4.0. Ligand data is returned in a dictionary. Ligand coordinate atom data with *model* and *ideal* coordinate sets are also stored in this dictionary. Note that this dictionary will contain data that is present in the XML file and all Ligand Expo XML files do not contain every possible data field. So, it may be better if you use :meth:`dict.get` instead of indexing the dictionary, e.g. to retrieve formula weight (or relative molar mass) of the chemical component use ``data.get('formula_weight')`` instead of ``data['formula_weight']`` to avoid exceptions when this data field is not found in the XML file. URL and/or path of the XML file are returned in the dictionary with keys ``url`` and ``path``, respectively. Following example downloads data for ligand STI (a.k.a. Gleevec and Imatinib) and calculates RMSD between model (X-ray structure 1IEP) and ideal (energy minimized) coordinate sets: .. ipython:: python from prody import * ligand_data = fetchPDBLigand('STI') ligand_data['model_coordinates_db_code'] ligand_model = ligand_data['model'] ligand_ideal = ligand_data['ideal'] transformation = superpose(ligand_ideal.noh, ligand_model.noh) calcRMSD(ligand_ideal.noh, ligand_model.noh)""" if not isinstance(cci, str): raise TypeError('cci must be a string') if isfile(cci): inp = openFile(cci) xml = inp.read() inp.close() url = None path = cci cci = splitext(splitext(split(cci)[1])[0])[0].upper() elif len(cci) > 4 or not cci.isalnum(): raise ValueError('cci must be 3-letters long and alphanumeric or ' 'a valid filename') else: xml = None cci = cci.upper() if SETTINGS.get('ligand_xml_save'): folder = join(getPackagePath(), 'pdbligands') if not isdir(folder): makePath(folder) xmlgz = path = join(folder, cci + '.xml.gz') if isfile(xmlgz): with openFile(xmlgz) as inp: xml = inp.read() else: path = None #url = ('http://ligand-expo.rcsb.org/reports/{0[0]}/{0}/{0}' # '.xml'.format(cci.upper())) url = 'http://files.rcsb.org/ligands/download/{0}.xml'.format( cci.upper()) if not xml: #'http://www.pdb.org/pdb/files/ligand/{0}.xml' try: inp = openURL(url) except IOError: raise IOError( 'XML file for ligand {0} is not found online'.format(cci)) else: xml = inp.read() inp.close() if filename: out = openFile(filename, mode='w', folder=folder) out.write(xml) out.close() if SETTINGS.get('ligand_xml_save'): with openFile(xmlgz, 'w') as out: out.write(xml) import xml.etree.cElementTree as ET root = ET.XML(xml) if (root.get('{http://www.w3.org/2001/XMLSchema-instance}' 'schemaLocation') != 'http://pdbml.pdb.org/schema/pdbx-v40.xsd pdbx-v40.xsd'): LOGGER.warn('XML is not in PDBx/PDBML v 4.0 format, resulting ' 'dictionary may not contain all data fields') ns = root.tag[:root.tag.rfind('}') + 1] len_ns = len(ns) dict_ = {'url': url, 'path': path} for child in list(root.find(ns + 'chem_compCategory')[0]): tag = child.tag[len_ns:] if tag.startswith('pdbx_'): tag = tag[5:] dict_[tag] = child.text dict_['formula_weight'] = float(dict_.get('formula_weight')) identifiers_and_descriptors = [] results = root.find(ns + 'pdbx_chem_comp_identifierCategory') if results: identifiers_and_descriptors.extend(results) results = root.find(ns + 'pdbx_chem_comp_descriptorCategory') if results: identifiers_and_descriptors.extend(results) for child in identifiers_and_descriptors: program = child.get('program').replace(' ', '_') type_ = child.get('type').replace(' ', '_') dict_[program + '_' + type_] = child[0].text dict_[program + '_version'] = child.get('program_version') dict_['audits'] = [ (audit.get('action_type'), audit.get('date')) for audit in list(root.find(ns + 'pdbx_chem_comp_auditCategory')) ] atoms = list(root.find(ns + 'chem_comp_atomCategory')) n_atoms = len(atoms) ideal_coords = np.zeros((n_atoms, 3)) model_coords = np.zeros((n_atoms, 3)) atomnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['name'].dtype) elements = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['element'].dtype) resnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['resname'].dtype) charges = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['charge'].dtype) resnums = np.ones(n_atoms, dtype=ATOMIC_FIELDS['charge'].dtype) alternate_atomnames = np.zeros(n_atoms, dtype=ATOMIC_FIELDS['name'].dtype) leaving_atom_flags = np.zeros(n_atoms, np.bool) aromatic_flags = np.zeros(n_atoms, np.bool) stereo_configs = np.zeros(n_atoms, np.bool) ordinals = np.zeros(n_atoms, int) name2index = {} for i, atom in enumerate(atoms): data = dict([(child.tag[len_ns:], child.text) for child in list(atom)]) name = data.get('pdbx_component_atom_id', 'X') name2index[name] = i atomnames[i] = name elements[i] = data.get('type_symbol', 'X') resnames[i] = data.get('pdbx_component_comp_id', 'UNK') charges[i] = float(data.get('charge', 0)) alternate_atomnames[i] = data.get('alt_atom_id', 'X') leaving_atom_flags[i] = data.get('pdbx_leaving_atom_flag') == 'Y' aromatic_flags[i] = data.get('pdbx_atomatic_flag') == 'Y' stereo_configs[i] = data.get('pdbx_stereo_config') == 'Y' ordinals[i] = int(data.get('pdbx_ordinal', 0)) model_coords[i, 0] = float(data.get('model_Cartn_x', 0)) model_coords[i, 1] = float(data.get('model_Cartn_y', 0)) model_coords[i, 2] = float(data.get('model_Cartn_z', 0)) ideal_coords[i, 0] = float(data.get('pdbx_model_Cartn_x_ideal', 0)) ideal_coords[i, 1] = float(data.get('pdbx_model_Cartn_y_ideal', 0)) ideal_coords[i, 2] = float(data.get('pdbx_model_Cartn_z_ideal', 0)) pdbid = dict_.get('model_coordinates_db_code') if pdbid: model = AtomGroup(cci + ' model ({0})'.format(pdbid)) else: model = AtomGroup(cci + ' model') model.setCoords(model_coords) model.setNames(atomnames) model.setResnames(resnames) model.setResnums(resnums) model.setElements(elements) model.setCharges(charges) model.setFlags('leaving_atom_flags', leaving_atom_flags) model.setFlags('aromatic_flags', aromatic_flags) model.setFlags('stereo_configs', stereo_configs) model.setData('ordinals', ordinals) model.setData('alternate_atomnames', alternate_atomnames) dict_['model'] = model ideal = model.copy() ideal.setTitle(cci + ' ideal') ideal.setCoords(ideal_coords) dict_['ideal'] = ideal bonds = [] warned = set() for bond in list(root.find(ns + 'chem_comp_bondCategory') or bonds): name_1 = bond.get('atom_id_1') name_2 = bond.get('atom_id_2') try: bonds.append((name2index[name_1], name2index[name_2])) except KeyError: if name_1 not in warned and name_1 not in name2index: warned.add(name_1) LOGGER.warn('{0} specified {1} in bond category is not ' 'a valid atom name.'.format(repr(name_1), cci)) if name_2 not in warned and name_2 not in name2index: warned.add(name_2) LOGGER.warn('{0} specified {1} in bond category is not ' 'a valid atom name.'.format(repr(name_2), cci)) if bonds: bonds = np.array(bonds, int) model.setBonds(bonds) ideal.setBonds(bonds) return dict_
def parseNMD(filename, type=NMA): """Returns normal mode and atomic data parsed from an NMD file. Normal mode data is returned in an :class:`~.NMA` instance. Atomic data is returned in an :class:`~.AtomGroup` instance.""" assert not isinstance(type, NMA), 'type must be NMA, ANM, GNM, or PCA' atomic = dict() modes = [] nmd = open(filename) for line in nmd: split = line.find(' ') if line[:split] == 'mode': modes.append(line[split:].strip()) elif line[:split] in ('coordinates', 'atomnames', 'resnames', 'resnums', 'resids', 'chainids', 'bfactors', 'name'): atomic[line[:split]] = line[split:].strip() nmd.close() name = atomic.pop('name', os.path.splitext(os.path.split(filename)[1])[0]) coords = atomic.pop('coordinates', None) dof = None if coords is not None: coords = np.fromstring( coords, dtype=float, sep=' ') dof = coords.shape[0] ag = None n_atoms = dof / 3 coords = coords.reshape((n_atoms, 3)) ag = AtomGroup(name) ag.setCoords(coords) data = atomic.pop('atomnames', None) if data is not None: ag.setNames(data.split()) data = atomic.pop('resnames', None) if data is not None: ag.setResnames(data.split()) data = atomic.pop('chainids', None) if data is not None: ag.setChids(data.split()) data = atomic.pop('resnums', None) if data is not None: ag.setResnums(np.fromstring(data, int, sep=' ')) data = atomic.pop('resids', None) if data is not None: ag.setResnums(np.fromstring(data, int, sep=' ')) data = atomic.pop('bfactors', None) if data is not None: ag.setBetas(np.fromstring(data, float, sep=' ')) nma = type(name) for mode in modes: items = mode.split() diff = len(items) - dof mode = np.array(items[diff:]).astype(float) if len(mode) != dof: pass if diff == 1 and not items[0].isdigit(): value = float(items[0]) else: if not items[0].isdigit(): value = float(items[0]) elif not items[1].isdigit(): value = float(items[1]) else: value = 1.0 nma.addEigenpair(mode, value) return nma, ag
def parsePSF(filename, title=None, ag=None): """Return an :class:`.AtomGroup` instance storing data parsed from X-PLOR format PSF file *filename*. Atom and bond information is parsed from the file. If *title* is not given, *filename* will be set as the title of the :class:`.AtomGroup` instance. An :class:`.AtomGroup` instance may be provided as *ag* argument. When provided, *ag* must have the same number of atoms in the same order as the file. Data from PSF file will be added to the *ag*. This may overwrite present data if it overlaps with PSF file content. Note that this function does not evaluate angles, dihedrals, and impropers sections.""" if ag is not None: if not isinstance(ag, AtomGroup): raise TypeError('ag must be an AtomGroup instance') psf = openFile(filename, 'rb') line = psf.readline() i_line = 1 while line: line = line.strip() if line.endswith('!NATOM'): n_atoms = int(line.split('!')[0]) break line = psf.readline() i_line += 1 if title is None: title = os.path.splitext(os.path.split(filename)[1])[0] else: title = str(title) if ag is None: ag = AtomGroup(title) else: if n_atoms != ag.numAtoms(): raise ValueError('ag and PSF file must have same number of atoms') serials = zeros(n_atoms, ATOMIC_FIELDS['serial'].dtype) segnames = zeros(n_atoms, ATOMIC_FIELDS['segment'].dtype) resnums = zeros(n_atoms, ATOMIC_FIELDS['resnum'].dtype) resnames = zeros(n_atoms, ATOMIC_FIELDS['resname'].dtype) atomnames = zeros(n_atoms, ATOMIC_FIELDS['name'].dtype) atomtypes = zeros(n_atoms, ATOMIC_FIELDS['type'].dtype) charges = zeros(n_atoms, ATOMIC_FIELDS['charge'].dtype) masses = zeros(n_atoms, ATOMIC_FIELDS['mass'].dtype) lines = psf.readlines(71 * (n_atoms + 5)) if len(lines) < n_atoms: raise IOError('number of lines in PSF is less than the number of ' 'atoms') for i, line in enumerate(lines): if i == n_atoms: break i_line += 1 if len(line) <= 71: serials[i] = line[:8] segnames[i] = line[9:13].strip() resnums[i] = line[14:19] resnames[i] = line[19:23].strip() atomnames[i] = line[24:28].strip() atomtypes[i] = line[29:35].strip() charges[i] = line[35:44] masses[i] = line[50:60] else: items = line.split() serials[i] = items[0] segnames[i] = items[1] resnums[i] = items[2] resnames[i] = items[3] atomnames[i] = items[4] atomtypes[i] = items[5] charges[i] = items[6] masses[i] = items[7] i = n_atoms while 1: line = lines[i].split() if len(line) >= 2 and line[1] == '!NBOND:': n_bonds = int(line[0]) break i += 1 lines = ''.join(lines[i + 1:]) + psf.read(n_bonds / 4 * 71) array = fromstring(lines, count=n_bonds * 2, dtype=int, sep=' ') if len(array) != n_bonds * 2: raise IOError('number of bonds expected and parsed do not match') psf.close() ag.setSerials(serials) ag.setSegnames(segnames) ag.setResnums(resnums) ag.setResnames(resnames) ag.setNames(atomnames) ag.setTypes(atomtypes) ag.setCharges(charges) ag.setMasses(masses) array = add(array, -1, array) ag.setBonds(array.reshape((n_bonds, 2))) return ag
def parsePSF(filename, title=None, ag=None): """Returns an :class:`.AtomGroup` instance storing data parsed from X-PLOR format PSF file *filename*. Atom and bond information is parsed from the file. If *title* is not given, *filename* will be set as the title of the :class:`.AtomGroup` instance. An :class:`.AtomGroup` instance may be provided as *ag* argument. When provided, *ag* must have the same number of atoms in the same order as the file. Data from PSF file will be added to the *ag*. This may overwrite present data if it overlaps with PSF file content. Note that this function does not evaluate angles, dihedrals, and impropers sections.""" if ag is not None: if not isinstance(ag, AtomGroup): raise TypeError("ag must be an AtomGroup instance") psf = openFile(filename, "rb") line = psf.readline() i_line = 1 while line: line = line.strip() if line.endswith(b"!NATOM"): n_atoms = int(line.split(b"!")[0]) break line = psf.readline() i_line += 1 if title is None: title = os.path.splitext(os.path.split(filename)[1])[0] else: title = str(title) if ag is None: ag = AtomGroup(title) else: if n_atoms != ag.numAtoms(): raise ValueError("ag and PSF file must have same number of atoms") serials = zeros(n_atoms, ATOMIC_FIELDS["serial"].dtype) segnames = zeros(n_atoms, ATOMIC_FIELDS["segment"].dtype) resnums = zeros(n_atoms, ATOMIC_FIELDS["resnum"].dtype) resnames = zeros(n_atoms, ATOMIC_FIELDS["resname"].dtype) atomnames = zeros(n_atoms, ATOMIC_FIELDS["name"].dtype) atomtypes = zeros(n_atoms, ATOMIC_FIELDS["type"].dtype) charges = zeros(n_atoms, ATOMIC_FIELDS["charge"].dtype) masses = zeros(n_atoms, ATOMIC_FIELDS["mass"].dtype) # lines = psf.readlines(71 * (n_atoms + 5)) n = 0 n_bonds = 0 for i, line in enumerate(psf): if line.strip() == b"": continue if b"!NBOND:" in line.upper(): items = line.split() n_bonds = int(items[0]) break if n + 1 > n_atoms: continue if len(line) <= 71: serials[n] = line[:8] segnames[n] = line[9:13].strip() resnums[n] = line[14:19] resnames[n] = line[19:23].strip() atomnames[n] = line[24:28].strip() atomtypes[n] = line[29:35].strip() charges[n] = line[35:44] masses[n] = line[50:60] else: items = line.split() serials[n] = items[0] segnames[n] = items[1] resnums[n] = items[2] resnames[n] = items[3] atomnames[n] = items[4] atomtypes[n] = items[5] charges[n] = items[6] masses[n] = items[7] n += 1 if n < n_atoms: raise IOError("number of lines in PSF is less than the number of " "atoms") # i = n_atoms # while 1: # line = lines[i].split() # if len(line) >= 2 and line[1] == '!NBOND:': # n_bonds = int(line[0]) # break # i += 1 # lines = ''.join(lines[i+1:]) + psf.read(n_bonds/4 * 71) lines = [] for i, line in enumerate(psf): if line.strip() == b"": continue if b"!" in line: break lines.append(line.decode(encoding="UTF-8")) lines = "".join(lines) array = fromstring(lines, count=n_bonds * 2, dtype=int, sep=" ") if len(array) != n_bonds * 2: raise IOError("number of bonds expected and parsed do not match") psf.close() ag.setSerials(serials) ag.setSegnames(segnames) ag.setResnums(resnums) ag.setResnames(resnames) ag.setNames(atomnames) ag.setTypes(atomtypes) ag.setCharges(charges) ag.setMasses(masses) array = add(array, -1, array) ag.setBonds(array.reshape((n_bonds, 2))) return ag