def _guess_atomic_number(name, residue=None): """ Guesses the atomic number """ # Special-case single-atom residues, which are almost always ions name = ''.join(c for c in name if c.isalpha()) if residue is None or len(residue.atoms) == 1: if len(name) > 1: try: return AtomicNum[name[0].upper() + name[1].lower()] except KeyError: return AtomicNum[element_by_name(name)] return AtomicNum[element_by_name(name)]
def __init__(self, fname, seq=None): super(XyzFile, self).__init__() if isinstance(fname, string_types): fxyz = genopen(fname, 'r') own_handle_xyz = True else: fxyz = fname own_handle_xyz = False if seq is not None: seqstruct = load_file(seq) # Now parse the file try: natom = int(fxyz.readline().split()[0]) except (ValueError, IndexError): raise TinkerError('Bad XYZ file format; first line') if seq is not None and natom != len(seqstruct.atoms): raise ValueError( 'Sequence file %s # of atoms does not match the # ' 'of atoms in the XYZ file' % seq) words = fxyz.readline().split() if len(words) == 6 and not XyzFile._check_atom_record(words): self.box = [float(w) for w in words] words = fxyz.readline().split() atom = Atom(atomic_number=AtomicNum[element_by_name(words[1])], name=words[1], type=words[5]) atom.xx, atom.xy, atom.xz = [float(w) for w in words[2:5]] residue = Residue('SYS') residue.number = 1 residue._idx = 0 if seq is not None: residue = seqstruct.residues[0] self.add_atom(atom, residue.name, residue.number, residue.chain, residue.insertion_code, residue.segid) bond_ids = [[int(w) for w in words[6:]]] for i, line in enumerate(fxyz): words = line.split() atom = Atom(atomic_number=AtomicNum[element_by_name(words[1])], name=words[1], type=words[5]) atom.xx, atom.xy, atom.xz = [float(w) for w in words[2:5]] if seq is not None: residue = seqstruct.atoms[i + 1].residue self.add_atom(atom, residue.name, residue.number, residue.chain, residue.insertion_code, residue.segid) bond_ids.append([int(w) for w in words[6:]]) # All of the bonds are stored now -- go ahead and make them now for atom, bonds in zip(self.atoms, bond_ids): i = atom.idx + 1 for idx in bonds: if idx > i: self.bonds.append(Bond(atom, self.atoms[idx - 1])) if own_handle_xyz: fxyz.close()
def __init__(self, fname, seq=None): super(XyzFile, self).__init__() if isinstance(fname, string_types): fxyz = genopen(fname, 'r') own_handle_xyz = True else: fxyz = fname own_handle_xyz = False if seq is not None: seqstruct = load_file(seq) # Now parse the file try: natom = int(fxyz.readline().split()[0]) except (ValueError, IndexError): raise TinkerError('Bad XYZ file format; first line') if seq is not None and natom != len(seqstruct.atoms): raise ValueError('Sequence file %s # of atoms does not match the # ' 'of atoms in the XYZ file' % seq) words = fxyz.readline().split() if len(words) == 6 and not XyzFile._check_atom_record(words): self.box = [float(w) for w in words] words = fxyz.readline().split() atom = Atom(atomic_number=AtomicNum[element_by_name(words[1])], name=words[1], type=words[5]) atom.xx, atom.xy, atom.xz = [float(w) for w in words[2:5]] residue = Residue('SYS') residue.number = 1 residue._idx = 0 if seq is not None: residue = seqstruct.residues[0] self.add_atom(atom, residue.name, residue.number, residue.chain, residue.insertion_code, residue.segid) bond_ids = [[int(w) for w in words[6:]]] for i, line in enumerate(fxyz): words = line.split() atom = Atom(atomic_number=AtomicNum[element_by_name(words[1])], name=words[1], type=words[5]) atom.xx, atom.xy, atom.xz = [float(w) for w in words[2:5]] if seq is not None: residue = seqstruct.atoms[i+1].residue self.add_atom(atom, residue.name, residue.number, residue.chain, residue.insertion_code, residue.segid) bond_ids.append([int(w) for w in words[6:]]) # All of the bonds are stored now -- go ahead and make them now for atom, bonds in zip(self.atoms, bond_ids): i = atom.idx + 1 for idx in bonds: if idx > i: self.bonds.append(Bond(atom, self.atoms[idx-1])) if own_handle_xyz: fxyz.close()
def to_parmed(self, title='', **kwargs): """Create a ParmEd Structure from a Compound. """ structure = pmd.Structure() structure.title = title if title else self.name atom_mapping = {} # For creating bonds below guessed_elements = set() for atom in self.particles(): atomic_number = None name = ''.join(char for char in atom.name if not char.isdigit()) try: atomic_number = AtomicNum[atom.name] except KeyError: element = element_by_name(atom.name) if name not in guessed_elements: warn('Guessing that "{}" is element: "{}"'.format( atom, element)) guessed_elements.add(name) else: element = atom.name atomic_number = atomic_number or AtomicNum[element] mass = Mass[element] pmd_atom = pmd.Atom(atomic_number=atomic_number, name=atom.name, mass=mass) pmd_atom.xx, pmd_atom.xy, pmd_atom.xz = atom.pos * 10 # Angstroms structure.add_atom(pmd_atom, resname='RES', resnum=1) atom_mapping[atom] = pmd_atom for atom1, atom2 in self.bonds(): bond = pmd.Bond(atom_mapping[atom1], atom_mapping[atom2]) structure.bonds.append(bond) box = self.boundingbox box_vector = np.empty(6) box_vector[3] = box_vector[4] = box_vector[5] = 90.0 for dim, val in enumerate(self.periodicity): if val: box_vector[dim] = val * 10 else: box_vector[dim] = box.lengths[dim] * 10 + 5 structure.box = box_vector return structure
def read(self, line): """ Reads a line Parameters ---------- line : str A line with an atom record from a GRO file Returns ------- atom, resname, resnum : Atom, str, int The Atom instance, residue name, and residue number containing the atom """ resnum = int(line[:5]) resname = line[5:10].strip() atomname = line[10:15].strip() elem = element_by_name(atomname) atomic_number = AtomicNum[elem] mass = Mass[elem] atnum = int(line[15:20]) if atomic_number == 0: atom = ExtraPoint(name=atomname, number=atnum) else: atom = Atom(atomic_number=atomic_number, name=atomname, number=atnum, mass=mass) if self._digits is None: self._pdeci = line.index('.', 20) self._ndeci = line.index('.', self._pdeci+1) self._digits = self._ndeci - self._pdeci atom.xx, atom.xy, atom.xz = ( float(line[20+i*self._digits:20+(i+1)*self._digits])*10 for i in range(3) ) wbeg = 20 + self._digits * 3 wend = wbeg + self._digits if line[wbeg:wend].strip(): atom.vx, atom.vy, atom.vz = ( float(line[wbeg+i*self._digits:wend+i*self._digits])*10 for i in range(3) ) return atom, resname, resnum
def to_parmed(self, title='', **kwargs): """Create a ParmEd Structure from a Compound. """ structure = pmd.Structure() structure.title = title if title else self.name atom_mapping = {} # For creating bonds below for atom in self.particles(): atomic_number = None try: atomic_number = AtomicNum[atom.name] except KeyError: element = element_by_name(atom.name) warn('Guessing that {} is element: {}'.format(atom, element)) else: element = atom.name atomic_number = atomic_number or AtomicNum[element] mass = Mass[element] pmd_atom = pmd.Atom(atomic_number=atomic_number, name=atom.name, mass=mass) pmd_atom.xx, pmd_atom.xy, pmd_atom.xz = atom.pos * 10 # Angstroms structure.add_atom(pmd_atom, resname='RES', resnum=1) atom_mapping[atom] = pmd_atom for atom1, atom2 in self.bonds(): bond = pmd.Bond(atom_mapping[atom1], atom_mapping[atom2]) structure.bonds.append(bond) box = self.boundingbox box_vector = np.empty(6) box_vector[3] = box_vector[4] = box_vector[5] = 90.0 for dim, val in enumerate(self.periodicity): if val: box_vector[dim] = val * 10 else: box_vector[dim] = box.lengths[dim] * 10 + 5 structure.box = box_vector return structure
def parse(filename): """ Parses a Gromacs GRO file Parameters ---------- filename : str or file-like Name of the file or the GRO file object Returns ------- struct : :class:`Structure` The Structure instance instantiated with *just* residues and atoms populated (with coordinates) """ struct = Structure() if isinstance(filename, string_types): fileobj = genopen(filename, 'r') own_handle = True else: fileobj = filename own_handle = False try: # Ignore the title line fileobj.readline() try: natom = int(fileobj.readline().strip()) except ValueError: raise GromacsError('Could not parse %s as GRO file' % filename) digits = None for i, line in enumerate(fileobj): if i == natom: break try: resnum = int(line[:5]) resname = line[5:10].strip() atomname = line[10:15].strip() elem = element_by_name(atomname) atomic_number = AtomicNum[elem] mass = Mass[elem] atnum = int(line[15:20]) if atomic_number == 0: atom = ExtraPoint(name=atomname, number=atnum) else: atom = Atom(atomic_number=atomic_number, name=atomname, number=atnum, mass=mass) if digits is None: pdeci = line.index('.', 20) ndeci = line.index('.', pdeci+1) digits = ndeci - pdeci atom.xx, atom.xy, atom.xz = ( float(line[20+i*digits:20+(i+1)*digits])*10 for i in range(3) ) i = 4 wbeg = (pdeci-4)+(5+ndeci)*(i-1) wend = (pdeci-4)+(5+ndeci)*i if line[wbeg:wend].strip(): atom.vx, atom.vy, atom.vz = ( float(line[(pdeci-3)+(6+ndeci)*i: (pdeci-3)+(6+ndeci)*(i+1)])*10 for i in range(3, 6) ) except (ValueError, IndexError): raise GromacsError('Could not parse the atom record of ' 'GRO file %s' % filename) struct.add_atom(atom, resname, resnum) # Get the box from the last line if it's present if line.strip(): try: box = [float(x) for x in line.split()] except ValueError: raise GromacsError('Could not understand box line of GRO ' 'file %s' % filename) if len(box) == 3: struct.box = [box[0]*10, box[1]*10, box[2]*10, 90.0, 90.0, 90.0] elif len(box) == 9: # Assume we have vectors leng, ang = box_vectors_to_lengths_and_angles( [box[0], box[3], box[4]]*u.nanometers, [box[5], box[1], box[6]]*u.nanometers, [box[7], box[8], box[2]]*u.nanometers) a, b, c = leng.value_in_unit(u.angstroms) alpha, beta, gamma = ang.value_in_unit(u.degrees) struct.box = [a, b, c, alpha, beta, gamma] finally: if own_handle: fileobj.close() return struct
def parse(filename): """ Read a PQR file and return a populated `Structure` class Parameters ---------- filename : str or file-like Name of the PQR file to read, or a file-like object that can iterate over the lines of a PQR. Compressed file names can be specified and are determined by file-name extension (e.g., file.pqr.gz, file.pqr.bz2) Returns ------- structure : :class:`Structure` The Structure object initialized with all of the information from the PDB file. No bonds or other topological features are added by default. """ if isinstance(filename, string_types): own_handle = True fileobj = genopen(filename, "r") else: own_handle = False fileobj = filename struct = Structure() # Add metadata fields modelno = 1 # For PDB files with multiple MODELs atomno = 0 coordinates = [] all_coordinates = [] # Support hexadecimal numbering like that printed by VMD try: for line in fileobj: words = line.split() if words[0] in ("ATOM", "HETATM"): atomno += 1 if len(words) == 10: _, num, nam, res, resn, x, y, z, chg, rad = words chn = "" elif len(words) >= 11: _, num, nam, res, chn, resn, x, y, z, chg, rad = (words[i] for i in range(11)) # If the radius is not a float (but rather a letter, # like the element or something), then the chain might # be missing. In this case, shift all tokens "back" one # and empty the chn string try: float(rad) except ValueError: resn, x, y, z, chg, rad = chn, resn, x, y, z, chg else: raise ValueError("Illegal PQR record format: expected " "10 or 11 tokens on the atom line") x, y, z = float(x), float(y), float(z) chg, rad = float(chg), float(rad) resn, num = int(resn), int(num) elem = element_by_name(nam) # Yuck atomic_number = AtomicNum[elem] mass = Mass[elem] if nam in ("EP", "LP"): # lone pair atom = ExtraPoint( atomic_number=atomic_number, name=nam, charge=chg, mass=mass, number=num, solvent_radius=rad ) else: atom = Atom( atomic_number=atomic_number, name=nam, charge=chg, mass=mass, number=num, solvent_radius=rad ) atom.xx, atom.xy, atom.xz = float(x), float(y), float(z) if modelno == 1: struct.add_atom(atom, res, resn, chn) else: try: orig_atom = struct.atoms[atomno - 1] except IndexError: raise PDBError("Extra atom in MODEL %d" % modelno) if orig_atom.residue.name != res.strip() or orig_atom.name != nam.strip(): raise PDBError( "Atom %d differs in MODEL %d [%s %s " "vs. %s %s]" % (atomno, modelno, orig_atom.residue.name, orig_atom.name, res, nam) ) coordinates.extend([atom.xx, atom.xy, atom.xz]) elif words[0] == "ENDMDL": # End the current model if len(struct.atoms) == 0: raise PDBError("MODEL ended before any atoms read in") modelno += 1 if len(struct.atoms) * 3 != len(coordinates): raise PDBError("Inconsistent atom numbers in some PDB models") all_coordinates.append(coordinates) atomno = 0 coordinates = [] elif words[0] == "MODEL": if modelno == 1 and len(struct.atoms) == 0: continue if len(coordinates) > 0: if len(struct.atoms) * 3 != len(coordinates): raise PDBError("Inconsistent atom numbers in " "some PDB models") warnings.warn("MODEL not explicitly ended", PDBWarning) all_coordinates.append(coordinates) coordinates = [] modelno += 1 atomno = 0 elif words[0] == "CRYST1": a, b, c = (float(w) for w in words[1:4]) try: A, B, C = (float(w) for w in words[4:7]) except ValueError: A = B = C = 90.0 struct.box = [a, b, c, A, B, C] finally: if own_handle: fileobj.close() struct.unchange() if coordinates: if len(coordinates) != 3 * len(struct.atoms): raise PDBError("bad number of atoms in some PQR models") all_coordinates.append(coordinates) struct._coordinates = np.array(all_coordinates).reshape((-1, len(struct.atoms), 3)) return struct
def parse(filename): """ Read a PQR file and return a populated `Structure` class Parameters ---------- filename : str or file-like Name of the PQR file to read, or a file-like object that can iterate over the lines of a PQR. Compressed file names can be specified and are determined by file-name extension (e.g., file.pqr.gz, file.pqr.bz2) Returns ------- structure : :class:`Structure` The Structure object initialized with all of the information from the PDB file. No bonds or other topological features are added by default. """ if isinstance(filename, string_types): own_handle = True fileobj = genopen(filename, 'r') else: own_handle = False fileobj = filename struct = Structure() # Add metadata fields modelno = 1 # For PDB files with multiple MODELs atomno = 0 coordinates = [] all_coordinates = [] # Support hexadecimal numbering like that printed by VMD try: for line in fileobj: words = line.split() if words[0] in ('ATOM', 'HETATM'): atomno += 1 if len(words) == 10: _, num, nam, res, resn, x, y, z, chg, rad = words chn = '' elif len(words) >= 11: _, num, nam, res, chn, resn, x, y, z, chg, rad = ( words[i] for i in range(11)) # If the radius is not a float (but rather a letter, # like the element or something), then the chain might # be missing. In this case, shift all tokens "back" one # and empty the chn string try: float(rad) except ValueError: resn, x, y, z, chg, rad = chn, resn, x, y, z, chg else: raise ValueError('Illegal PQR record format: expected ' '10 or 11 tokens on the atom line') x, y, z = float(x), float(y), float(z) chg, rad = float(chg), float(rad) resn, num = int(resn), int(num) elem = element_by_name(nam) # Yuck atomic_number = AtomicNum[elem] mass = Mass[elem] if nam in ('EP', 'LP'): # lone pair atom = ExtraPoint(atomic_number=atomic_number, name=nam, charge=chg, mass=mass, number=num, solvent_radius=rad) else: atom = Atom(atomic_number=atomic_number, name=nam, charge=chg, mass=mass, number=num, solvent_radius=rad) atom.xx, atom.xy, atom.xz = float(x), float(y), float(z) if modelno == 1: struct.add_atom(atom, res, resn, chn) else: try: orig_atom = struct.atoms[atomno - 1] except IndexError: raise PDBError('Extra atom in MODEL %d' % modelno) if (orig_atom.residue.name != res.strip() or orig_atom.name != nam.strip()): raise PDBError( 'Atom %d differs in MODEL %d [%s %s ' 'vs. %s %s]' % (atomno, modelno, orig_atom.residue.name, orig_atom.name, res, nam)) coordinates.extend([atom.xx, atom.xy, atom.xz]) elif words[0] == 'ENDMDL': # End the current model if len(struct.atoms) == 0: raise PDBError('MODEL ended before any atoms read in') modelno += 1 if len(struct.atoms) * 3 != len(coordinates): raise PDBError( 'Inconsistent atom numbers in some PDB models') all_coordinates.append(coordinates) atomno = 0 coordinates = [] elif words[0] == 'MODEL': if modelno == 1 and len(struct.atoms) == 0: continue if len(coordinates) > 0: if len(struct.atoms) * 3 != len(coordinates): raise PDBError('Inconsistent atom numbers in ' 'some PDB models') warnings.warn('MODEL not explicitly ended', PDBWarning) all_coordinates.append(coordinates) coordinates = [] modelno += 1 atomno = 0 elif words[0] == 'CRYST1': a, b, c = (float(w) for w in words[1:4]) try: A, B, C = (float(w) for w in words[4:7]) except ValueError: A = B = C = 90.0 struct.box = [a, b, c, A, B, C] finally: if own_handle: fileobj.close() struct.unchange() if coordinates: if len(coordinates) != 3 * len(struct.atoms): raise PDBError('bad number of atoms in some PQR models') all_coordinates.append(coordinates) struct._coordinates = np.array(all_coordinates).reshape( (-1, len(struct.atoms), 3)) return struct