def _parseatoms(self, pdb): atoms = [] # translate Bio.PDB atom objects to MDAnalysis Atom. for iatom, atom in enumerate(pdb.get_atoms()): residue = atom.parent chain_id = residue.parent.id atomname = atom.name atomtype = guess_atom_type(atomname) resname = residue.resname resid = int(residue.id[1]) # no empty segids (or Universe throws IndexError) segid = residue.get_segid().strip() or chain_id or "SYSTEM" mass = guess_atom_mass(atomname) charge = guess_atom_charge(atomname) bfactor = atom.bfactor # occupancy = atom.occupancy atoms.append( Atom(iatom, atomname, atomtype, resname, resid, segid, mass, charge, bfactor=bfactor)) return atoms
def _parse_atoms(self, psffile, natoms, mass, atom_style): """Special parsing for atoms Lammps atoms can have lots of different formats, and even custom formats. http://lammps.sandia.gov/doc/atom_style.html Treated here are - atoms with 7 fields (with charge) "full" - atoms with 6 fields (no charge) "molecular" """ logger.info("Doing Atoms section") atoms = [] psffile.next() for i in xrange(natoms): line = psffile.next().strip() # logger.debug("Line: {} contains: {}".format(i, line)) idx, resid, atype, q, x, y, z = self._parse_atom_line(line) name = str(atype) try: m = mass[atype] except KeyError: m = 0.0 # m = guess_atom_mass(name) # i think types are just ints though? # Atom() format: # Number, name, type, resname, resid, segid, mass, charge atoms.append(Atom(idx, name, atype, str(resid), resid, str(resid), m, q)) return atoms
def parse(self): """Parse GRO file *filename* and return the dict `structure`. Only reads the list of atoms. :Returns: MDAnalysis internal *structure* dict .. SeeAlso:: The *structure* dict is defined in :func:`MDAnalysis.topology.base`. """ atom_iter = 0 atoms = [] with openany(self.filename, "r") as grofile: segid = "SYSTEM" for line in grofile: try: resid, resname, name = int( line[0:5]), line[5:10].strip(), line[10:15].strip() # guess based on atom name elem = guess_atom_element(name) atype = elem mass = get_atom_mass(elem) charge = guess_atom_charge(name) # segid = "SYSTEM" # ignore coords and velocities, they can be read by coordinates.GRO # Not currently doing anything with other lines except (ValueError, IndexError): pass # if linenum == 0: # Header comment #hdr_cmt = line #pass #elif linenum == 1: # Header: number of particles #hdr_np = int(line) # A bit dodgy; should find a better way # of locating the box_vectors line # pass #else: #ftr_box = line If the line can't # otherwise be read properly, then this # probably indicates a problem with the # gro line, and an error will be raised # pass except: raise IOError( "Couldn't read the following line of the .gro file:\n" "{}".format(line)) else: # Just use the atom_iter (counting from 0) rather than # the number in the .gro file (which wraps at 99999) atoms.append( Atom(atom_iter, name, atype, resname, resid, segid, mass, charge)) atom_iter += 1 structure = {'_atoms': atoms} return structure
def do_mtop(data, fver): # mtop: the topology of the whole system symtab = do_symtab(data) do_symstr(data, symtab) # system_name do_ffparams(data, fver) # params nmoltype = data.unpack_int() moltypes = [] # non-gromacs for i in xrange(nmoltype): moltype = do_moltype(data, symtab, fver) moltypes.append(moltype) nmolblock = data.unpack_int() mtop = obj.Mtop(nmoltype, moltypes, nmolblock) ttop = obj.TPRTopology(*[[] for i in xrange(5)]) atom_start_ndx = 0 res_start_ndx = 0 for i in xrange(mtop.nmolblock): # molb_type is just an index for moltypes/molecule_types mb = do_molblock(data) # segment is made to correspond to the molblock as in gromacs, the # naming is kind of arbitrary segid = "seg_{0}_{1}".format(i, mtop.moltypes[mb.molb_type].name) for j in xrange(mb.molb_nmol): mt = mtop.moltypes[mb.molb_type] # mt: molecule type for atomkind in mt.atomkinds: ttop.atoms.append( Atom(atomkind.id + atom_start_ndx, atomkind.name, atomkind.type, atomkind.resname, atomkind.resid + res_start_ndx, segid, atomkind.mass, atomkind.charge)) # remap_ method returns [blah, blah, ..] or [] ttop.bonds.extend(mt.remap_bonds(atom_start_ndx)) ttop.angles.extend(mt.remap_angles(atom_start_ndx)) ttop.dihe.extend(mt.remap_dihe(atom_start_ndx)) ttop.impr.extend(mt.remap_impr(atom_start_ndx)) atom_start_ndx += mt.number_of_atoms() res_start_ndx += mt.number_of_residues() # not useful here # data.unpack_int() # mtop_natoms # do_atomtypes(data) # mtop_ffparams_cmap_grid_ngrid = 0 # mtop_ffparams_cmap_grid_grid_spacing = 0.1 # mtop_ffparams_cmap_grid_cmapdata = 'NULL' # do_groups(data, symtab) return ttop
def parse(self): """Parse CRD file *filename* and return the dict `structure`. Only reads the list of atoms. :Returns: MDAnalysis internal *structure* dict .. SeeAlso:: The *structure* dict is defined in `MDAnalysis.topology` """ extformat = FORTRANReader( '2I10,2X,A8,2X,A8,3F20.10,2X,A8,2X,A8,F20.10') stdformat = FORTRANReader('2I5,1X,A4,1X,A4,3F10.5,1X,A4,1X,A4,F10.5') atoms = [] atom_serial = 0 with openany(self.filename) as crd: for linenum, line in enumerate(crd): # reading header if line.split()[0] == '*': continue elif line.split()[-1] == 'EXT' and bool(int( line.split()[0])) is True: r = extformat continue elif line.split()[0] == line.split( )[-1] and line.split()[0] != '*': r = stdformat continue # anything else should be an atom try: serial, TotRes, resName, name, x, y, z, chainID, resSeq, tempFactor = r.read( line) except: raise ValueError("Check CRD format at line {}: {}".format( linenum, line.rstrip())) atomtype = guess_atom_type(name) mass = guess_atom_mass(name) charge = guess_atom_charge(name) atoms.append( Atom(atom_serial, name, atomtype, resName, TotRes, chainID, mass, charge)) atom_serial += 1 structure = {} structure["_atoms"] = atoms return structure
def parse(self): with openany(self.filename) as inf: inf.readline() levcfg, imcon = map(int, inf.readline().split()[:2]) # Box info if not imcon == 0: inf.readline() inf.readline() inf.readline() # Nummol inf.readline() # Loop over molecules resid = 1 atomid = 0 atoms = [] segid = 'SYSTEM' line = inf.readline().strip() while line: if line.startswith('MOLECULE'): resid += 1 resname = line.split()[1] else: name = line.split()[0] inf.readline() atoms.append( Atom(atomid, name, name, resname, resid, segid, 1.0, 1.0, universe=self._u)) atomid += 1 line = inf.readline() return {'atoms': atoms}
def _parseatoms_(self, pdb): atoms = [] # translate list of atoms to MDAnalysis Atom. for iatom, atom in enumerate(pdb._atoms): atomname = atom.name atomtype = atom.type # always set in PDBQT resname = atom.resName resid = int(atom.resSeq) chain = atom.chainID.strip() segid = chain or "SYSTEM" # no empty segids (or Universe throws IndexError) mass = guess_atom_mass(atomname) charge = float(atom.partialCharge) # always set in PDBQT bfactor = atom.tempFactor # occupancy = atom.occupancy atoms.append(Atom(iatom, atomname, atomtype, resname, resid, segid, mass, charge, bfactor=bfactor)) return atoms
def _parseatoms(self, pqr): atoms = [] # translate list of atoms to MDAnalysis Atom. for iatom, atom in enumerate(pqr._atoms): atomname = atom.name atomtype = guess_atom_type(atomname) resname = atom.resName resid = int(atom.resSeq) chain = atom.chainID.strip() # no empty segids (or Universe throws IndexError) segid = atom.segID.strip() or chain or "SYSTEM" mass = guess_atom_mass(atomname) charge = float(atom.charge) radius = atom.radius atoms.append(Atom(iatom, atomname, atomtype, resname, resid, segid, mass, charge, radius=radius)) return atoms
def _parseatoms(self, pdb): atoms = [] # translate list of atoms to MDAnalysis Atom. for iatom, atom in enumerate(pdb._atoms): # ATOM if len(atom.__dict__) == 10: atomname = atom.name atomtype = atom.element or guess_atom_type(atomname) resname = atom.resName resid = int(atom.resSeq) chain = atom.chainID.strip() # no empty segids (or Universe throws IndexError) segid = atom.segID.strip() or chain or "SYSTEM" mass = guess_atom_mass(atomname) charge = guess_atom_charge(atomname) bfactor = atom.tempFactor # occupancy = atom.occupancy altLoc = atom.altLoc atoms.append( Atom(iatom, atomname, atomtype, resname, resid, segid, mass, charge, bfactor=bfactor, serial=atom.serial, altLoc=altLoc)) # TER atoms #elif len(atom.__dict__) == 5: # pass # #atoms.append(None) self.structure["_atoms"] = atoms
def parse(self): """Parse DMS file *filename* and return the dict `structure`. Only reads the list of atoms. :Returns: MDAnalysis internal *structure* dict, which contains Atom and Bond objects .. SeeAlso:: The *structure* dict is defined in `MDAnalysis.topology`. """ # Fix by SB: Needed because sqlite3.connect does not raise anything if file is not there if not os.path.isfile(self.filename): raise IOError("No such file: {}".format(self.filename)) def dict_factory(cursor, row): """ Fetch SQL records as dictionaries, rather than the default tuples. """ d = {} for idx, col in enumerate(cursor.description): d[col[0]] = row[idx] return d with sqlite3.connect(self.filename) as con: try: # This will return dictionaries instead of tuples, # when calling cur.fetch() or fetchall() con.row_factory = dict_factory cur = con.cursor() cur.execute('SELECT * FROM particle') particles = cur.fetchall() except sqlite3.DatabaseError: raise IOError("Failed reading the atoms from DMS Database") else: # p["anum"] contains the atomic number try: atoms = [ Atom(p["id"], p["name"].strip(), guess_atom_type(p["name"].strip()), p["resname"].strip(), p["resid"], p["segid"].strip(), p["mass"], p["charge"]) for p in particles ] except KeyError: raise ValueError("Failed reading atom information") try: cur.execute('SELECT * FROM bond') bonds = cur.fetchall() except sqlite3.DatabaseError: raise IOError("Failed reading the bonds from DMS Database") else: bondlist = [] bondorder = {} for b in bonds: desc = tuple(sorted([b['p0'], b['p1']])) bondlist.append(desc) bondorder[desc] = b['order'] # All the records below besides donors and acceptors can be contained in a DMS file. # In addition to the coordinates and bonds, DMS may contain the entire force-field # information (terms+parameters), structure = { "_atoms": atoms, "_bonds": tuple(bondlist), "_bondorder": bondorder } return structure
def _build_structure(self): #atoms cannot be written out of index order, so we #need to iterate residue by residue index = 0 reverse_map = {} residues = {} #keep track of selections, so we can throw a useful error if we don't end up selecting anything selection_count = {} segments = {} for s in self.selections: selection_count[s] = 0 #if we're reducing the residues, we'll need to take care of that ref_residues = self.ref_u.residues if (self.residue_reduction_map): #reduce them ref_residues = [] for i, ri in enumerate(self.residue_reduction_map): ref_residues.append( Residue(name='CMB', id=i + 1, atoms=reduce(lambda x, y: x + y, [self.ref_u.residues[j] for j in ri]), resnum=i + 1)) for r in ref_residues: residue_atoms = [] for s, n in zip(self.selections, self.names): group = r.selectAtoms(s) #check if there were any selected atoms if (len(group) == 0): continue selection_count[s] += len(group) #make new atom new_mass = sum([x.mass if x in group else 0 for x in r]) if (sum([1 if x in group else 0 for x in r]) > 0 and new_mass == 0): raise ValueError( 'Zero mass CG particle found! Please check all-atom masses and/or set them manually via \"fine_grain_universe.selectAtoms(...).set_mass(...)\"' ) a = Atom(index, n, n, r.name, r.id, r.atoms[0].segid, new_mass, 0) index += 1 for ra in group: if (ra in reverse_map): raise ValueError( 'Attemtping to map {} to {} and {}'.format( ra, a, reverse_map[ra])) reverse_map[ra] = a #append atom to new residue atom group residue_atoms.append(a) #add the atom to Universe self.atoms += a #now actually create new residue and give atoms a reference to it residues[r.id] = Residue(r.name, r.id, residue_atoms, resnum=r.resnum) for a in residue_atoms: a.residue = residues[r.id] #take care of putting residue into segment segid = None if len(residue_atoms) == 0 else residue_atoms[0].segid if (segid in segments): segments[segid].append(residues[r.id]) elif (segid): segments[segid] = [residues[r.id]] #check to make sure we selected something total_selected = 0 for s in self.selections: count = selection_count[s] total_selected += count if (count == 0): raise ValueError('Selection "%s" matched no atoms' % s) #check counting if (len(self.ref_u.atoms) < total_selected): print 'Warining: some atoms placed into more than 1 CG Site' elif (len(self.ref_u.atoms) > total_selected): print 'Warning: some atoms not placed into CG site' #find hydrogens and collapse them into beads if (self.chydrogens): for b in self.ref_u.bonds: #my hack for inferring a hydrogen for a1, a2 in [(b.atom1, b.atom2), (b.atom2, b.atom1)]: if (a1.type.startswith('H') and a1.mass < 4.): reverse_map[a1] = reverse_map[a2] #add the mass reverse_map[a2].mass += a1.mass #generate matrix mappings for center of mass and sum of forces # A row is a mass normalized cg site defition. or unormalized 1s for forces self.top_map = npsp.lil_matrix( (self.atoms.numberOfAtoms(), self.ref_u.atoms.numberOfAtoms()), dtype=np.float32) self.force_map = npsp.lil_matrix( (self.atoms.numberOfAtoms(), self.ref_u.atoms.numberOfAtoms()), dtype=np.float32) for a in self.ref_u.atoms: try: self.top_map[reverse_map[a].number, a.number] = a.mass / reverse_map[a].mass self.force_map[reverse_map[a].number, a.number] = 1. except KeyError: #was not selected pass #Put them into efficient sparse matrix. self.top_map = self.top_map.tobsr() self.force_map = self.force_map.tobsr() #add bonds using the reverse map self.bonds = [] for b in self.ref_u.bonds: try: cgatom1 = reverse_map[b.atom1] cgatom2 = reverse_map[b.atom2] for cbg in self.bonds: if (not (cbg.atom1 in [cgatom1, cgatom2]) and not (cbg.atom2 in [cgatom1, cgatom2])): #OK, no bond exists yet self.bonds.append(Bond(cgatom1, cgatom2)) except KeyError: #was not in selection pass self.__trajectory = CGReader(self, self.ref_u.trajectory, self.top_map, self.force_map, self.lfdump) for a in self.atoms: a.universe = self #take care of segments now segment_groups = {} for k, v in segments.iteritems(): segment_groups[k] = Segment(k, v) for a in self.atoms: a.segment = segment_groups[a.segid] self.atoms._rebuild_caches()
def parse(self, filename=None): """Parse MOL2 file *filename* and return the dict `structure`. Only reads the list of atoms. :Returns: MDAnalysis internal *structure* dict .. SeeAlso:: The *structure* dict is defined in :func:`MDAnalysis.topology.PSFParser.PSFParser`. """ if not filename: filename = self.filename blocks = [] with openany(filename) as f: for i, line in enumerate(f): # found new molecules if "@<TRIPOS>MOLECULE" in line: if len(blocks): break blocks.append({"start_line": i, "lines": []}) blocks[-1]["lines"].append(line) if not len(blocks): raise ValueError("The mol2 file '{}' needs to have at least one" " @<TRIPOS>MOLECULE block".format(filename)) block = blocks[0] sections = {} cursor = None for line in block["lines"]: if "@<TRIPOS>" in line: cursor = line.split("@<TRIPOS>")[1].strip().lower() sections[cursor] = [] continue elif line.startswith("#") or line == "\n": continue sections[cursor].append(line) atom_lines, bond_lines = sections["atom"], sections["bond"] if not len(atom_lines): raise ValueError("The mol2 block ({}:{}) has no atoms".format( os.path.basename(filename), block["start_line"])) if not len(bond_lines): raise ValueError("The mol2 block ({}:{}) has no bonds".format( os.path.basename(filename), block["start_line"])) atoms = [] for a in atom_lines: aid, name, x, y, z, atom_type, resid, resname, charge = a.split() aid = int(aid) - 1 #x, y, z = float(x), float(y), float(z) resid = int(resid) charge = float(charge) element = guess_atom_type(name) mass = guess_atom_mass(element) # atom type is sybl atom type atoms.append( Atom(aid, name, atom_type, resname, resid, "X", mass, charge)) #guess_atom_type(a.split()[1] bonds = [] bondorder = {} for b in bond_lines: # bond_type can be: 1, 2, am, ar bid, a0, a1, bond_type = b.split() a0, a1 = int(a0) - 1, int(a1) - 1 bond = tuple(sorted([a0, a1])) bondorder[bond] = bond_type bonds.append(bond) structure = {"_atoms": atoms, "_bonds": bonds, "_bondorder": bondorder} return structure
def parse(self): """Parse Amber PRMTOP topology file *filename*. :Returns: MDAnalysis internal *structure* dict. """ formatversion = 10 with openany(self.filename) as topfile: for line in topfile: if line.startswith("%FLAG ATOMIC_NUMBER"): formatversion = 12 break if formatversion == 12: sections = [ ("ATOM_NAME", 1, 20, self._parseatoms, "_name", 0), ("CHARGE", 1, 5, self._parsesection, "_charge", 0), ("ATOMIC_NUMBER", 1, 10, self._parsesectionint, "_skip", 0), ("MASS", 1, 5, self._parsesection, "_mass", 0), ("ATOM_TYPE_INDEX", 1, 10, self._parsesectionint, "_atom_type", 0), ("NUMBER_EXCLUDED_ATOMS", 1, 10, self._parseskip, "_skip", 8), ("NONBONDED_PARM_INDEX", 1, 10, self._parseskip, "_skip", 8), ("RESIDUE_LABEL", 1, 20, self._parseatoms, "_resname", 11), ("RESIDUE_POINTER", 2, 10, self._parsesectionint, "_respoint", 11), ] #("BOND_FORCE_CONSTANT", 1, 5, self._parseskip,"_skip",8), #("BOND_EQUIL_VALUE", 1, 5, self._parseskip,"_skip",8), #("ANGLE_FORCE_CONSTANT", 1, 5, self._parseskip,"_skip",8), #("ANGLE_EQUIL_VALUE", 1, 5, self._parseskip,"_skip",8), #("DIHEDRAL_FORCE_CONSTANT", 1, 5, self._parseskip,"_skip",8), #("DIHEDRAL_PERIODICITY", 1, 5, self._parseskip,"_skip",8), #("DIHEDRAL_PHASE", 1, 5, self._parseskip,"_skip",8), #("SOLTY", 1, 5, self._parseskip,"_skip",8), #("LENNARD_JONES_ACOEF", 1, 5, self._parseskip,"_skip",8), #("LENNARD_JONES_BCOEF", 1, 5, self._parseskip,"_skip",8), #("BONDS_INC_HYDROGEN", 2, 4, self._parsebond, "_bonds",2), #("ANGLES_INC_HYDROGEN", 3, 3, self._parsesection, "_angles"), #("DIHEDRALS_INC_HYDROGEN", 4, 2, self._parsesection, "_dihe"), #("NIMPHI", 4, 2, self._parsesection, "_impr"), #("NDON", 2, 4, self._parsesection,"_donors"), #("NACC", 2, 4, self._parsesection,"_acceptors"), elif formatversion == 10: sections = [ ("ATOM_NAME", 1, 20, self._parseatoms, "_name", 0), ("CHARGE", 1, 5, self._parsesection, "_charge", 0), ("MASS", 1, 5, self._parsesection, "_mass", 0), ("ATOM_TYPE_INDEX", 1, 10, self._parsesectionint, "_atom_type", 0), ("NUMBER_EXCLUDED_ATOMS", 1, 10, self._parseskip, "_skip", 8), ("NONBONDED_PARM_INDEX", 1, 10, self._parseskip, "_skip", 8), ("RESIDUE_LABEL", 1, 20, self._parseatoms, "_resname", 11), ("RESIDUE_POINTER", 2, 10, self._parsesectionint, "_respoint", 11), ] #("BOND_FORCE_CONSTANT", 1, 5, self._parseskip,"_skip",8), #("BOND_EQUIL_VALUE", 1, 5, self._parseskip,"_skip",8), #("ANGLE_FORCE_CONSTANT", 1, 5, self._parseskip,"_skip",8), #("ANGLE_EQUIL_VALUE", 1, 5, self._parseskip,"_skip",8), #("DIHEDRAL_FORCE_CONSTANT", 1, 5, self._parseskip,"_skip",8), #("DIHEDRAL_PERIODICITY", 1, 5, self._parseskip,"_skip",8), #("DIHEDRAL_PHASE", 1, 5, self._parseskip,"_skip",8), #("SOLTY", 1, 5, self._parseskip,"_skip",8), #("LENNARD_JONES_ACOEF", 1, 5, self._parseskip,"_skip",8), #("LENNARD_JONES_BCOEF", 1, 5, self._parseskip,"_skip",8), #("BONDS_INC_HYDROGEN", 2, 4, self._parsebond, "_bonds",2), #("ANGLES_INC_HYDROGEN", 3, 3, self._parsesection, "_angles"), #("DIHEDRALS_INC_HYDROGEN", 4, 2, self._parsesection, "_dihe")] #("NIMPHI", 4, 2, self._parsesection, "_impr"), #("NDON", 2, 4, self._parsesection,"_donors"), #("NACC", 2, 4, self._parsesection,"_acceptors")] # Open and check top validity # Reading header info POINTERS with openany(self.filename) as topfile: next_line = topfile.next header = next_line() if header[:3] != "%VE": raise ValueError( "{} is not a valid TOP file. %VE Missing in header".format( topfile)) title = next_line().split() if not (title[1] == "TITLE"): raise ValueError( "{} is not a valid TOP file. 'TITLE' missing in header". format(topfile)) while header[:14] != '%FLAG POINTERS': header = next_line() header = next_line() topremarks = [next_line().strip() for i in xrange(4)] sys_info = [int(k) for i in topremarks for k in i.split()] structure = {} final_structure = {} try: for info in sections: self._parse_sec(sys_info, info, next_line, structure, final_structure) except StopIteration: raise ValueError("The TOP file didn't contain the minimum" " required section of ATOM_NAME") # Completing info respoint to include all atoms in last resid structure["_respoint"].append(sys_info[0]) structure["_respoint"][-1] = structure["_respoint"][-1] + 1 atoms = [ None, ] * sys_info[0] j = 0 segid = "SYSTEM" for i in range(sys_info[0]): charge = convert(structure["_charge"][i], 'Amber', flags['charge_unit']) if structure["_respoint"][j] <= i + 1 < structure["_respoint"][j + 1]: resid = j + 1 resname = structure["_resname"][j] else: j += 1 resid = j + 1 resname = structure["_resname"][j] mass = structure["_mass"][i] atomtype = structure["_atom_type"][i] atomname = structure["_name"][i] #segid = 'SYSTEM' # does not exist in Amber atoms[i] = Atom(i, atomname, atomtype, resname, resid, segid, mass, charge) final_structure["_atoms"] = atoms final_structure["_numatoms"] = sys_info[0] return final_structure
def _parseatoms(self, lines, atoms_per, numlines): """Parses atom section in a Charmm PSF file. Normal (standard) and extended (EXT) PSF format are supported. CHEQ is supported in the sense that CHEQ data is simply ignored. CHARMM Format from ``source/psffres.src``: CHEQ:: II,LSEGID,LRESID,LRES,TYPE(I),IAC(I),CG(I),AMASS(I),IMOVE(I),ECH(I),EHA(I) standard format: (I8,1X,A4,1X,A4,1X,A4,1X,A4,1X,I4,1X,2G14.6,I8,2G14.6) (I8,1X,A4,1X,A4,1X,A4,1X,A4,1X,A4,1X,2G14.6,I8,2G14.6) XPLOR expanded format EXT: (I10,1X,A8,1X,A8,1X,A8,1X,A8,1X,I4,1X,2G14.6,I8,2G14.6) (I10,1X,A8,1X,A8,1X,A8,1X,A8,1X,A4,1X,2G14.6,I8,2G14.6) XPLOR no CHEQ:: II,LSEGID,LRESID,LRES,TYPE(I),IAC(I),CG(I),AMASS(I),IMOVE(I) standard format: (I8,1X,A4,1X,A4,1X,A4,1X,A4,1X,I4,1X,2G14.6,I8) (I8,1X,A4,1X,A4,1X,A4,1X,A4,1X,A4,1X,2G14.6,I8) XPLOR expanded format EXT: (I10,1X,A8,1X,A8,1X,A8,1X,A8,1X,I4,1X,2G14.6,I8) (I10,1X,A8,1X,A8,1X,A8,1X,A8,1X,A4,1X,2G14.6,I8) XPLOR NAMD PSF space separated, see release notes for VMD 1.9.1, psfplugin at http://www.ks.uiuc.edu/Research/vmd/current/devel.html : psfplugin: Added more logic to the PSF plugin to determine cases where the CHARMM "EXTended" PSF format cannot accomodate long atom types, and we add a "NAMD" keyword to the PSF file flags line at the top of the file. Upon reading, if we detect the "NAMD" flag there, we know that it is possible to parse the file correctly using a simple space-delimited scanf() format string, and we use that strategy rather than holding to the inflexible column-based fields that are a necessity for compatibility with CHARMM, CNS, X-PLOR, and other formats. NAMD and the psfgen plugin already assume this sort of space-delimited formatting, but that's because they aren't expected to parse the PSF variants associated with the other programs. For the VMD PSF plugin, having the "NAMD" tag in the flags line makes it absolutely clear that we're dealing with a NAMD-specific file so we can take the same approach. """ # how to partition the line into the individual atom components atom_parsers = { 'STANDARD': lambda l: (l[:8], l[9:13].strip() or "SYSTEM", l[14:18], l[19:23].strip(), l[ 24:28].strip(), l[29:33].strip(), l[34:48], l[48:62]), # l[62:70], l[70:84], l[84:98] ignore IMOVE, ECH and EHA, 'EXTENDED': lambda l: (l[:10], l[11:19].strip() or "SYSTEM", l[20:28], l[29:37].strip(), l[38:46].strip(), l[47:51].strip(), l[52:66], l[66:70]), # l[70:78], l[78:84], l[84:98] ignore IMOVE, ECH and EHA, 'NAMD': lambda l: l.split()[:8], } atom_parser = atom_parsers[self._format] # once partitioned, assigned each component the correct type set_type = lambda x: (int(x[0]) - 1, x[1] or "SYSTEM", int(x[2]), x[3], x[4], x[5], float(x[6]), float(x[7])) # Oli: I don't think that this is the correct OUTPUT format: # psf_atom_format = " %5d %4s %4d %4s %-4s %-4s %10.6f %7.4f%s\n" # It should be rather something like: # psf_ATOM_format = '%(iatom)8d %(segid)4s %(resid)-4d %(resname)4s '+\ # '%(name)-4s %(type)4s %(charge)-14.6f%(mass)-14.4f%(imove)8d\n' # source/psfres.src (CHEQ and now can be used for CHEQ EXTended), see comments above # II,LSEGID,LRESID,LRES,TYPE(I),IAC(I),CG(I),AMASS(I),IMOVE(I),ECH(I),EHA(I) # (I8,1X,A4, 1X,A4, 1X,A4, 1X,A4, 1X,I4, 1X,2G14.6, I8, 2G14.6) # 0:8 9:13 14:18 19:23 24:28 29:33 34:48 48:62 62:70 70:84 84:98 atoms = [ None, ] * numlines for i in xrange(numlines): line = lines() try: iatom, segid, resid, resname, atomname, atomtype, charge, mass = set_type( atom_parser(line)) except ValueError: # last ditch attempt: this *might* be a NAMD/VMD space-separated "PSF" file from # VMD version < 1.9.1 atom_parser = atom_parsers['NAMD'] iatom, segid, resid, resname, atomname, atomtype, charge, mass = set_type( atom_parser(line)) logger.warn( "Guessing that this is actually a NAMD-type PSF file..." " continuing with fingers crossed!") logger.debug("First NAMD-type line: {}: {}".format( i, line.rstrip())) atoms[i] = Atom(iatom, atomname, atomtype, resname, resid, segid, mass, charge) return atoms