def parseLines(self, lines): """Parse list of lines in RAWXYZ format. Return Structure object or raise StructureFormatError. """ linefields = [l.split() for l in lines] # prepare output structure stru = Structure() # find first valid record start = 0 for field in linefields: if len(field) == 0 or field[0] == "#": start += 1 else: break # find the last valid record stop = len(lines) while stop > start and len(linefields[stop-1]) == 0: stop -= 1 # get out for empty structure if start >= stop: return stru # here we have at least one valid record line # figure out xyz layout from the first line for plain and raw formats floatfields = [ isfloat(f) for f in linefields[start] ] nfields = len(linefields[start]) if nfields not in (3, 4): emsg = ("%d: invalid RAWXYZ format, expected 3 or 4 columns" % (start + 1)) raise StructureFormatError(emsg) if floatfields[:3] == [True, True, True]: el_idx, x_idx = (None, 0) elif floatfields[:4] == [False, True, True, True]: el_idx, x_idx = (0, 1) else: emsg = "%d: invalid RAWXYZ format" % (start + 1) raise StructureFormatError(emsg) # now try to read all record lines try: p_nl = start for fields in linefields[start:] : p_nl += 1 if fields == []: continue elif len(fields) != nfields: emsg = ('%d: all lines must have ' + 'the same number of columns') % p_nl raise StructureFormatError, emsg element = el_idx is not None and fields[el_idx] or "" xyz = [ float(f) for f in fields[x_idx:x_idx+3] ] if len(xyz) == 2: xyz.append(0.0) stru.addNewAtom(element, xyz=xyz) except ValueError: emsg = "%d: invalid number" % p_nl exc_type, exc_value, exc_traceback = sys.exc_info() raise StructureFormatError, emsg, exc_traceback return stru
def parseLines(self, lines): """Parse list of lines in DISCUS format. Return PDFFitStructure instance or raise StructureFormatError. """ self.lines = lines ilines = self._linesIterator() self.stru = PDFFitStructure() record_parsers = { "cell": self._parse_cell, "format": self._parse_format, "generator": self._parse_not_implemented, "molecule": self._parse_not_implemented, "ncell": self._parse_ncell, "spcgr": self._parse_spcgr, "symmetry": self._parse_not_implemented, "title": self._parse_title, "shape": self._parse_shape, } try: # parse header for self.line in ilines: words = self.line.split() if not words or words[0][0] == '#': continue if words[0] == 'atoms': break rp = record_parsers.get(words[0], self._parse_unknown_record) rp(words) # check if cell has been defined if not self.cell_read: emsg = "%d: unit cell not defined" % self.nl raise StructureFormatError(emsg) # parse atoms for self.line in ilines: words = self.line.split() if not words or words[0][0] == '#': continue self._parse_atom(words) # self consistency check exp_natoms = reduce(lambda x, y: x * y, self.stru.pdffit['ncell']) # only check if ncell record exists if self.ncell_read and exp_natoms != len(self.stru): emsg = 'Expected %d atoms, read %d.' % \ (exp_natoms, len(self.stru)) raise StructureFormatError(emsg) # take care of superlattice if self.stru.pdffit['ncell'][:3] != [1, 1, 1]: latpars = list(self.stru.lattice.abcABG()) superlatpars = [ latpars[i] * self.stru.pdffit['ncell'][i] for i in range(3) ] + latpars[3:] superlattice = Lattice(*superlatpars) self.stru.placeInLattice(superlattice) self.stru.pdffit['ncell'] = [1, 1, 1, exp_natoms] except (ValueError, IndexError): exc_type, exc_value, exc_traceback = sys.exc_info() emsg = "%d: file is not in DISCUS format" % self.nl raise StructureFormatError, emsg, exc_traceback return self.stru
def _parse_format(self, words): """Process the format record from DISCUS structure file. """ if words[1] == 'pdffit': emsg = "%d: file is not in DISCUS format" % self.nl raise StructureFormatError(emsg) return
def getParser(format): """Return Parser instance for a given structure format. Raises StructureFormatError exception when format is not defined. """ if format not in parser_index: emsg = "no parser for '%s' format" % format raise StructureFormatError(emsg) pmod = parser_index[format]['module'] import_cmd = 'from diffpy.Structure.Parsers import %s as pm' % pmod exec(import_cmd) return pm.getParser()
def _parse_cell(self, words): """Process the cell record from DISCUS structure file. """ # split again on spaces or commas words = self.line.replace(',', ' ').split() latpars = [ float(w) for w in words[1:7] ] try: self.stru.lattice.setLatPar(*latpars) except ZeroDivisionError: emsg = "%d: Invalid lattice parameters - zero cell volume" % \ self.nl raise StructureFormatError(emsg) self.cell_read = True return
def getParser(format, **kw): """Return Parser instance for a given structure format. kw -- keyword arguments passed to the Parser init function. Raises StructureFormatError exception when format is not defined. """ if format not in parser_index: emsg = "no parser for '%s' format" % format raise StructureFormatError(emsg) pmod = parser_index[format]['module'] pm = None import_cmd = 'from diffpy.Structure.Parsers import %s as pm' % pmod exec(import_cmd) return pm.getParser(**kw)
def parseLines(self, lines): """Parse list of lines in PDFfit format. Return Structure object or raise StructureFormatError. """ p_nl = 0 rlist = [] try: self.stru = PDFFitStructure() stru = self.stru cell_line_read = False stop = len(lines) while stop > 0 and lines[stop - 1].strip() == "": stop -= 1 ilines = iter(lines[:stop]) # read header of PDFFit file for l in ilines: p_nl += 1 words = l.split() if len(words) == 0 or words[0][0] == '#': continue elif words[0] == 'title': stru.title = l.lstrip()[5:].strip() elif words[0] == 'scale': stru.pdffit['scale'] = float(words[1]) elif words[0] == 'sharp': l1 = l.replace(',', ' ') sharp_pars = [float(w) for w in l1.split()[1:]] if len(sharp_pars) < 4: stru.pdffit['delta2'] = sharp_pars[0] stru.pdffit['sratio'] = sharp_pars[1] stru.pdffit['rcut'] = sharp_pars[2] else: stru.pdffit['delta2'] = sharp_pars[0] stru.pdffit['delta1'] = sharp_pars[1] stru.pdffit['sratio'] = sharp_pars[2] stru.pdffit['rcut'] = sharp_pars[3] elif words[0] == 'spcgr': key = 'spcgr' start = l.find(key) + len(key) value = l[start:].strip() stru.pdffit['spcgr'] = value elif words[0] == 'shape': self._parse_shape(l) elif words[0] == 'cell': cell_line_read = True l1 = l.replace(',', ' ') latpars = [float(w) for w in l1.split()[1:7]] stru.lattice = Lattice(*latpars) elif words[0] == 'dcell': l1 = l.replace(',', ' ') stru.pdffit['dcell'] = [float(w) for w in l1.split()[1:7]] elif words[0] == 'ncell': l1 = l.replace(',', ' ') stru.pdffit['ncell'] = [int(w) for w in l1.split()[1:5]] elif words[0] == 'format': if words[1] != 'pdffit': emsg = "%d: file is not in PDFfit format" % p_nl raise StructureFormatError(emsg) elif words[0] == 'atoms' and cell_line_read: break else: self.ignored_lines.append(l) # Header reading finished, check if required lines were present. if not cell_line_read: emsg = "%d: file is not in PDFfit format" % p_nl raise StructureFormatError(emsg) # Load data from atom entries. p_natoms = reduce(lambda x, y: x * y, stru.pdffit['ncell']) # we are now inside data block for l in ilines: p_nl += 1 wl1 = l.split() element = wl1[0][0].upper() + wl1[0][1:].lower() xyz = [float(w) for w in wl1[1:4]] occ = float(wl1[4]) stru.addNewAtom(element, xyz=xyz, occupancy=occ) a = stru.getLastAtom() p_nl += 1 wl2 = ilines.next().split() a.sigxyz = [float(w) for w in wl2[0:3]] a.sigo = float(wl2[3]) p_nl += 1 wl3 = ilines.next().split() p_nl += 1 wl4 = ilines.next().split() p_nl += 1 wl5 = ilines.next().split() p_nl += 1 wl6 = ilines.next().split() a.sigU = numpy.zeros((3, 3), dtype=float) a.U11 = float(wl3[0]) a.U22 = float(wl3[1]) a.U33 = float(wl3[2]) a.sigU[0, 0] = float(wl4[0]) a.sigU[1, 1] = float(wl4[1]) a.sigU[2, 2] = float(wl4[2]) a.U12 = float(wl5[0]) a.U13 = float(wl5[1]) a.U23 = float(wl5[2]) a.sigU[0, 1] = a.sigU[1, 0] = float(wl6[0]) a.sigU[0, 2] = a.sigU[2, 0] = float(wl6[1]) a.sigU[1, 2] = a.sigU[2, 1] = float(wl6[2]) if len(stru) != p_natoms: emsg = "expected %d atoms, read %d" % (p_natoms, len(stru)) raise StructureFormatError(emsg) if stru.pdffit['ncell'][:3] != [1, 1, 1]: superlatpars = [ latpars[i] * stru.pdffit['ncell'][i] for i in range(3) ] + latpars[3:] superlattice = Lattice(*superlatpars) stru.placeInLattice(superlattice) stru.pdffit['ncell'] = [1, 1, 1, p_natoms] except (ValueError, IndexError): emsg = "%d: file is not in PDFfit format" % p_nl exc_type, exc_value, exc_traceback = sys.exc_info() raise StructureFormatError, emsg, exc_traceback return stru
def toLines(self, stru): """Convert Structure stru to a list of lines in XCFG atomeye format. Return list of strings. """ if len(stru) == 0: emsg = "cannot convert empty structure to XCFG format" raise StructureFormatError(emsg) lines = [] lines.append("Number of particles = %i" % len(stru)) # figure out length unit A allxyz = numpy.array([a.xyz for a in stru]) lo_xyz = allxyz.min(axis=0) hi_xyz = allxyz.max(axis=0) max_range_xyz = (hi_xyz - lo_xyz).max() if numpy.allclose(stru.lattice.abcABG(), (1, 1, 1, 90, 90, 90)): max_range_xyz += self.cluster_boundary # range of CFG coordinates must be less than 1 p_A = numpy.ceil(max_range_xyz + 1.0e-13) # atomeye draws rubbish when boxsize is less than 3.5 hi_ucvect = max( [numpy.sqrt(numpy.dot(v, v)) for v in stru.lattice.base]) if hi_ucvect * p_A < 3.5: p_A = numpy.ceil(3.5 / hi_ucvect) lines.append("A = %.8g Angstrom" % p_A) # how much do we need to shift the coordinates? p_dxyz = numpy.zeros(3, dtype=float) for i in range(3): if lo_xyz[i]/p_A < 0.0 or hi_xyz[i]/p_A >= 1.0 \ or (lo_xyz[i] == hi_xyz[i] and lo_xyz[i] == 0.0) : p_dxyz[i] = 0.5 - (hi_xyz[i] + lo_xyz[i]) / 2.0 / p_A # H0 tensor for i in range(3): for j in range(3): lines.append("H0(%i,%i) = %.8g A" % (i + 1, j + 1, stru.lattice.base[i, j])) # get out for empty structure if len(stru) == 0: return lines a_first = stru[0] p_NO_VELOCITY = "v" not in a_first.__dict__ if p_NO_VELOCITY: lines.append(".NO_VELOCITY.") # build a p_auxiliaries list of (aux_name,atom_expression) tuples # if stru came from xcfg file, it would store original auxiliaries in # xcfg dictionary try: p_auxiliaries = [(aux, "a." + aux) for aux in stru.xcfg['auxiliaries']] except AttributeError: p_auxiliaries = [] # add occupancy if any atom has nonunit occupancy for a in stru: if a.occupancy != 1.0: p_auxiliaries.append(('occupancy', 'a.occupancy')) break # add temperature factor with as many terms as needed # check whether all temperature factors are zero or isotropic p_allUzero = True p_allUiso = True for a in stru: if p_allUzero and numpy.any(a.U != 0.0): p_allUzero = False if not numpy.all(a.U == a.U[0, 0] * numpy.identity(3)): p_allUiso = False # here p_allUzero must be false break if p_allUzero: pass elif p_allUiso: p_auxiliaries.append(('Uiso', 'uflat[0]')) else: p_auxiliaries.extend([('U11', 'uflat[0]'), ('U22', 'uflat[4]'), ('U33', 'uflat[8]')]) # check if there are off-diagonal elements allU = numpy.array([a.U for a in stru]) if numpy.any(allU[:, 0, 1] != 0.0): p_auxiliaries.append(('U12', 'uflat[1]')) if numpy.any(allU[:, 0, 2] != 0.0): p_auxiliaries.append(('U13', 'uflat[2]')) if numpy.any(allU[:, 1, 2] != 0.0): p_auxiliaries.append(('U23', 'uflat[5]')) # count entries p_entry_count = (3 if p_NO_VELOCITY else 6) + len(p_auxiliaries) lines.append("entry_count = %d" % p_entry_count) # add auxiliaries for i in range(len(p_auxiliaries)): lines.append("auxiliary[%d] = %s [au]" % (i, p_auxiliaries[i][0])) # now define entry format efmt for representing atom properties fmwords = ["{pos[0]:.8g}", "{pos[1]:.8g}", "{pos[2]:.8g}"] if not p_NO_VELOCITY: fmwords += ["{v[0]:.8g}", "{v[1]:.8g}", "{v[2]:.8g}"] fmwords += (('{' + e + ':.8g}') for p, e in p_auxiliaries) efmt = ' '.join(fmwords) # we are ready to output atoms: lines.append("") p_element = None for a in stru: if a.element != p_element: p_element = a.element lines.append("%.4f" % AtomicMass.get(p_element, 0.0)) lines.append(p_element) pos = a.xyz / p_A + p_dxyz v = None if p_NO_VELOCITY else a.v uflat = numpy.ravel(a.U) entry = efmt.format(pos=pos, v=v, uflat=uflat, a=a) lines.append(entry) return lines
def parseLines(self, lines): """Parse list of lines in PDB format. Return Structure object or raise StructureFormatError. """ xcfg_Number_of_particles = None xcfg_A = None xcfg_H0 = numpy.zeros((3, 3), dtype=float) xcfg_H0_set = numpy.zeros((3, 3), dtype=bool) xcfg_NO_VELOCITY = False xcfg_entry_count = None p_nl = 0 p_auxiliary_re = re.compile(r"^auxiliary\[(\d+)\] =") p_auxiliary = {} stru = Structure() # ignore trailing blank lines stop = len(lines) for line in reversed(lines): if line.strip(): break stop -= 1 # iterator over the valid data lines ilines = iter(lines[:stop]) try: # read XCFG header for line in ilines: p_nl += 1 stripped_line = line.strip() # blank lines and lines starting with # are ignored if stripped_line == "" or line[0] == '#': continue elif xcfg_Number_of_particles is None: if line.find("Number of particles =") != 0: emsg = ("%d: first line must " + "contain 'Number of particles ='") % p_nl raise StructureFormatError(emsg) xcfg_Number_of_particles = int(line[21:].split(None, 1)[0]) p_natoms = xcfg_Number_of_particles elif line.find("A =") == 0: xcfg_A = float(line[3:].split(None, 1)[0]) elif line.find("H0(") == 0: i, j = (int(line[3]) - 1, int(line[5]) - 1) xcfg_H0[i, j] = float(line[10:].split(None, 1)[0]) xcfg_H0_set[i, j] = True elif line.find(".NO_VELOCITY.") == 0: xcfg_NO_VELOCITY = True elif line.find("entry_count =") == 0: xcfg_entry_count = int(line[13:].split(None, 1)[0]) elif p_auxiliary_re.match(line): m = p_auxiliary_re.match(line) idx = int(m.group(1)) p_auxiliary[idx] = line[m.end():].split(None, 1)[0] else: break # check header for consistency if numpy.any(xcfg_H0_set == False): emsg = "H0 tensor is not properly defined" raise StructureFormatError(emsg) p_auxnum = len(p_auxiliary) and max(p_auxiliary.keys()) + 1 for i in range(p_auxnum): if not i in p_auxiliary: p_auxiliary[i] = "aux%d" % i sorted_aux_keys = sorted(p_auxiliary.keys()) if p_auxnum != 0: stru.xcfg = { 'auxiliaries': [p_auxiliary[k] for k in sorted_aux_keys] } ecnt = len(p_auxiliary) + (3 if xcfg_NO_VELOCITY else 6) if ecnt != xcfg_entry_count: emsg = ("%d: auxiliary fields are " "not consistent with entry_count") % p_nl raise StructureFormatError(emsg) # define proper lattice stru.lattice.setLatBase(xcfg_H0) # here we are inside the data block p_element = None for line in ilines: p_nl += 1 words = line.split() # ignore atom mass if len(words) == 1 and isfloat(words[0]): continue # parse element allowing empty symbol elif len(words) <= 1: w = line.strip() p_element = w[:1].upper() + w[1:].lower() elif len(words) == xcfg_entry_count and p_element is not None: fields = [float(w) for w in words] xyz = [xcfg_A * xi for xi in fields[:3]] stru.addNewAtom(p_element, xyz=xyz) a = stru[-1] _assign_auxiliaries(a, fields, auxiliaries=p_auxiliary, no_velocity=xcfg_NO_VELOCITY) else: emsg = "%d: invalid record" % p_nl raise StructureFormatError(emsg) if len(stru) != p_natoms: emsg = "expected %d atoms, read %d" % (p_natoms, len(stru)) raise StructureFormatError(emsg) except (ValueError, IndexError): emsg = "%d: file is not in XCFG format" % p_nl exc_type, exc_value, exc_traceback = sys.exc_info() raise StructureFormatError, emsg, exc_traceback return stru
def parseLines(self, lines): """Parse list of lines in atoms format. Return Structure object or raise StructureFormatError. """ comlist = ["#", "%", "!", "*"] atoms = [] title = "" anext = False sg = None structure = BRAtomsStructure() meta = structure.bratoms pdict = dict.fromkeys(self.plist) # Count the lines ln = 0 try: for line in lines: ln += 1 # Strip comments from the line for c in comlist: idx = line.find(c) if idx != -1: line = line[:idx] # Move on if there is not a line if not line: continue # Move on if there was only white space in the line sline = line.split() if not sline: continue # Check if we have atoms following if sline[0].startswith("atom"): anext = True continue # Check for title if sline[0].startswith("title"): if title: title += "\n" title += line[5:] continue # Get rid of pesky "=" and "," signs while "=" in sline: sline.remove("=") while "," in sline: sline.remove(",") # space group if sline and sline[0].startswith("space"): meta["space"] = line[5:].strip() continue # output if sline and sline[0].startswith("output"): meta["output"] = line[6:].strip() continue # shift if sline and sline[0].startswith("shift"): meta["shift"] = line[5:].strip() continue # Check for other metadata while sline and sline[0].strip() in meta: key = sline.pop(0).strip() if key == "central": key = "core" meta[key] = sline.pop(0).strip() # Check for lattice information. while sline and sline[0].strip() in self.plist: key = sline.pop(0).strip() pdict[key] = float(sline.pop(0)) # Check for atom information if sline and anext: elraw = sline.pop(0).strip() el = elraw[:1].upper() + elraw[1:].lower() x = float(sline.pop(0)) y = float(sline.pop(0)) z = float(sline.pop(0)) tag = "" if sline: tag = sline.pop(0).strip() occ = 1.0 if sline: occ = float(sline.pop(0)) a = Atom( atype = el, xyz = [x,y,z], name = tag, occupancy = occ) atoms.append(a) except (ValueError, IndexError), e: emsg = "%d: file is not in Atoms format" % ln raise StructureFormatError(emsg)
class P_bratoms(StructureParser): """Parser for Bruce Ravel's Atoms structure format. """ plist = ["a", "b", "c", "alpha", "beta", "gamma"] def __init__(self): StructureParser.__init__(self) self.format = "bratoms" return def parseLines(self, lines): """Parse list of lines in atoms format. Return Structure object or raise StructureFormatError. """ comlist = ["#", "%", "!", "*"] atoms = [] title = "" anext = False sg = None structure = BRAtomsStructure() meta = structure.bratoms pdict = dict.fromkeys(self.plist) # Count the lines ln = 0 try: for line in lines: ln += 1 # Strip comments from the line for c in comlist: idx = line.find(c) if idx != -1: line = line[:idx] # Move on if there is not a line if not line: continue # Move on if there was only white space in the line sline = line.split() if not sline: continue # Check if we have atoms following if sline[0].startswith("atom"): anext = True continue # Check for title if sline[0].startswith("title"): if title: title += "\n" title += line[5:] continue # Get rid of pesky "=" and "," signs while "=" in sline: sline.remove("=") while "," in sline: sline.remove(",") # space group if sline and sline[0].startswith("space"): meta["space"] = line[5:].strip() continue # output if sline and sline[0].startswith("output"): meta["output"] = line[6:].strip() continue # shift if sline and sline[0].startswith("shift"): meta["shift"] = line[5:].strip() continue # Check for other metadata while sline and sline[0].strip() in meta: key = sline.pop(0).strip() if key == "central": key = "core" meta[key] = sline.pop(0).strip() # Check for lattice information. while sline and sline[0].strip() in self.plist: key = sline.pop(0).strip() pdict[key] = float(sline.pop(0)) # Check for atom information if sline and anext: elraw = sline.pop(0).strip() el = elraw[:1].upper() + elraw[1:].lower() x = float(sline.pop(0)) y = float(sline.pop(0)) z = float(sline.pop(0)) tag = "" if sline: tag = sline.pop(0).strip() occ = 1.0 if sline: occ = float(sline.pop(0)) a = Atom( atype = el, xyz = [x,y,z], name = tag, occupancy = occ) atoms.append(a) except (ValueError, IndexError), e: emsg = "%d: file is not in Atoms format" % ln raise StructureFormatError(emsg) # Make sure we have atoms. if len(atoms) == 0: raise StructureFormatError("File contains no atoms") # Make sure we have unit cell parameters if pdict["a"] is None: emsg = "Missing definition of cell parameter" raise StructureFormatError(emsg) # Fill in optional information if it was missing. if pdict["alpha"] is None: pdict["alpha"] = 90.0 if pdict["beta"] is None: pdict["beta"] = pdict["alpha"] if pdict["gamma"] is None: pdict["gamma"] = pdict["alpha"] if pdict["b"] is None: pdict["b"] = pdict["a"] if pdict["c"] is None: pdict["c"] = pdict["a"] if meta['core'] is None: meta['core'] = atoms[0].element lat = Lattice(**pdict) structure.title = title structure.lattice = lat structure.extend(atoms) return structure
def parseLines(self, lines): """Parse list of lines in XYZ format. Return Structure object or raise StructureFormatError. """ linefields = [l.split() for l in lines] # prepare output structure stru = Structure() # find first valid record start = 0 for field in linefields: if len(field) == 0 or field[0] == "#": start += 1 else: break # first valid line gives number of atoms try: lfs = linefields[start] w1 = linefields[start][0] if len(lfs) == 1 and str(int(w1)) == w1: p_natoms = int(w1) #try to get lattice vectors from description line try: latticeVecs = list(map(float, linefields[start+1])) assert len(latticeVecs)==9, "Expect 9 numbers for the 3 basis vectors" reshaped = [latticeVecs[0:3], latticeVecs[3:6], latticeVecs[6:9]] stru.lattice = Lattice(base=reshaped) needsDescription = True except: import traceback as tb import warnings warnings.warn("Failed to parse lattice vectors: \n{}".format(tb.format_exc())) needsDescription = False stru.description = lines[start+1].strip() start += 2 else: emsg = ("%d: invalid XYZ format, missing number of atoms" % (start + 1)) raise StructureFormatError(emsg) except (IndexError, ValueError): exc_type, exc_value, exc_traceback = sys.exc_info() emsg = ("%d: invalid XYZ format, missing number of atoms" % (start + 1)) raise StructureFormatError(emsg).with_traceback(exc_traceback) # find the last valid record stop = len(lines) while stop > start and len(linefields[stop-1]) == 0: stop -= 1 # get out for empty structure if p_natoms == 0 or start >= stop: return stru # here we have at least one valid record line nfields = len(linefields[start]) if nfields != 4 and nfields != 5: emsg = "%d: invalid XYZ format, expected 4 or 5 columns" % (start + 1) raise StructureFormatError(emsg) # now try to read all record lines try: p_nl = start for fields in linefields[start:] : p_nl += 1 if fields == []: continue elif len(fields) != 4 and len(fields) !=5: emsg = ('%d: all lines must have ' + 'a symbol, position, and optionally charge') % p_nl raise StructureFormatError(emsg) symbol = fields[0] symbol = symbol[0].upper() + symbol[1:].lower() xyz = [ float(f) for f in fields[1:4] ] if len(fields)==5: charge = float(fields[4]) else: charge = 0.0 stru.addNewAtom(symbol, xyz=xyz) stru.getLastAtom().charge=charge except ValueError: exc_type, exc_value, exc_traceback = sys.exc_info() emsg = "%d: invalid number format" % p_nl raise StructureFormatError(emsg).with_traceback(exc_traceback) # finally check if all the atoms have been read if p_natoms is not None and len(stru) != p_natoms: emsg = "expected %d atoms, read %d" % (p_natoms, len(stru)) raise StructureFormatError(emsg) # if needsDescription: # stru.generateDescription() return stru
def parseLines(self, lines): """Parse list of lines in PDB format. Return Structure object or raise StructureFormatError. """ xcfg_Number_of_particles = None xcfg_A = None xcfg_H0 = numpy.zeros((3,3), dtype=float) xcfg_H0_set = numpy.zeros((3,3), dtype=bool) xcfg_NO_VELOCITY = False xcfg_entry_count = None xcfg_auxiliary = [] p_nl = 0 p_auxiliary_re = re.compile(r"^auxiliary\[(\d+)\] =") p_auxiliary = {} try: stru = Structure() # ignore trailing blank lines stop = len(lines) while stop>0 and lines[stop-1].strip() == "": stop -= 1 ilines = iter(lines[:stop]) # read XCFG header for line in ilines: p_nl += 1 stripped_line = line.strip() # blank lines and lines starting with # are ignored if stripped_line == "" or line[0] == '#': continue elif xcfg_Number_of_particles is None: if line.find("Number of particles =") != 0: emsg = ("%d: first line must " + "contain 'Number of particles ='") % p_nl raise StructureFormatError(emsg) xcfg_Number_of_particles = int(line[21:].split(None, 1)[0]) p_natoms = xcfg_Number_of_particles elif line.find("A =") == 0: xcfg_A = float(line[3:].split(None, 1)[0]) elif line.find("H0(") == 0: i, j = ( int(line[3])-1 , int(line[5])-1 ) xcfg_H0[i,j] = float(line[10:].split(None, 1)[0]) xcfg_H0_set[i,j] = True elif line.find(".NO_VELOCITY.") == 0: xcfg_NO_VELOCITY = True elif line.find("entry_count =") == 0: xcfg_entry_count = int(line[13:].split(None, 1)[0]) elif p_auxiliary_re.match(line): m = p_auxiliary_re.match(line) idx = int(m.group(1)) p_auxiliary[idx] = line[m.end():].split(None, 1)[0] else: break # check header for consistency if numpy.any(xcfg_H0_set == False): emsg = "H0 tensor is not properly defined" raise StructureFormatError(emsg) p_auxnum = len(p_auxiliary) and max(p_auxiliary.keys())+1 for i in range(p_auxnum): if not i in p_auxiliary: p_auxiliary[i] = "aux%d" % i sorted_aux_keys = p_auxiliary.keys() sorted_aux_keys.sort() if p_auxnum != 0: stru.xcfg = { 'auxiliaries' : [ p_auxiliary[k] for k in sorted_aux_keys ] } if 6-3*xcfg_NO_VELOCITY+len(p_auxiliary) != xcfg_entry_count: emsg = ("%d: auxiliary fields " + "not consistent with entry_count") % p_nl raise StructureFormatError(emsg) # define proper lattice stru.lattice.setLatBase(xcfg_H0) # build p_assign_atom function to assign entries to proper fields p_exprs = [ "a.xyz[0]=fields[0]", "a.xyz[1]=fields[1]", "a.xyz[2]=fields[2]" ] if not xcfg_NO_VELOCITY: p_exprs += [ "a.v=numpy.zeros(3, dtype=float)", "a.v[0]=fields[3]", "a.v[1]=fields[4]", "a.v[2]=fields[5]" ] for idx in sorted_aux_keys: prop = p_auxiliary[idx] col = idx + 6 - 3*xcfg_NO_VELOCITY if prop == "Uiso": p_exprs.append("a.U[0,0]=a.U[1,1]=a.U[2,2]=" + "fields[%d]" % col) elif re.match(r"^U\d\d$", prop) \ and 1<=int(prop[1])<=3 and 1<=int(prop[2])<=3 : i, j = int(prop[1])-1, int(prop[2])-1 if i==j: p_exprs.append("a.U[%i,%i]=fields[%d]" % (i, j, col) ) else: p_exprs.append("a.U[%i,%i]=a.U[%i,%i]=fields[%d]" % \ (i, j, j, i, col) ) else: p_exprs.append( "a.__dict__[%r]=fields[%d]" % \ (prop, col) ) p_assign_expr = "pass; " + "; ".join(p_exprs[3:]) exec "def p_assign_atom(a, fields) : %s" % p_assign_expr # here we are inside data p_element = None p_nl -= 1 for line in lines[p_nl:stop]: p_nl += 1 words = line.split() # ignore atom mass if len(words) == 1 and isfloat(words[0]): continue # parse element allowing empty symbol elif len(words) <= 1: w = line.strip() p_element = w[:1].upper() + w[1:].lower() elif len(words) == xcfg_entry_count and p_element is not None: fields = [ float(w) for w in words ] stru.addNewAtom(p_element, fields[:3]) a = stru.getLastAtom() a.xyz *= xcfg_A p_assign_atom(a, fields) else: emsg = "%d: invalid record" % p_nl raise StructureFormatError(emsg) if len(stru) != p_natoms: emsg = "expected %d atoms, read %d" % (p_natoms, len(stru)) raise StructureFormatError(emsg) except (ValueError, IndexError): emsg = "%d: file is not in XCFG format" % p_nl exc_type, exc_value, exc_traceback = sys.exc_info() raise StructureFormatError, emsg, exc_traceback return stru
def parseLines(self, lines): """Parse list of lines in XYZ format. Return Structure object or raise StructureFormatError. """ linefields = [l.split() for l in lines] # prepare output structure stru = Structure() # find first valid record start = 0 for field in linefields: if len(field) == 0 or field[0] == "#": start += 1 else: break # first valid line gives number of atoms try: lfs = linefields[start] w1 = linefields[start][0] if len(lfs) == 1 and str(int(w1)) == w1: p_natoms = int(w1) stru.title = lines[start+1].strip() start += 2 else: emsg = ("%d: invalid XYZ format, missing number of atoms" % (start + 1)) raise StructureFormatError(emsg) except (IndexError, ValueError): exc_type, exc_value, exc_traceback = sys.exc_info() emsg = ("%d: invalid XYZ format, missing number of atoms" % (start + 1)) raise StructureFormatError, emsg, exc_traceback # find the last valid record stop = len(lines) while stop > start and len(linefields[stop-1]) == 0: stop -= 1 # get out for empty structure if p_natoms == 0 or start >= stop: return stru # here we have at least one valid record line nfields = len(linefields[start]) if nfields != 4: emsg = "%d: invalid XYZ format, expected 4 columns" % (start + 1) raise StructureFormatError(emsg) # now try to read all record lines try: p_nl = start for fields in linefields[start:] : p_nl += 1 if fields == []: continue elif len(fields) != nfields: emsg = ('%d: all lines must have ' + 'the same number of columns') % p_nl raise StructureFormatError(emsg) element = fields[0] element = element[0].upper() + element[1:].lower() xyz = [ float(f) for f in fields[1:4] ] stru.addNewAtom(element, xyz=xyz) except ValueError: exc_type, exc_value, exc_traceback = sys.exc_info() emsg = "%d: invalid number format" % p_nl raise StructureFormatError, emsg, exc_traceback # finally check if all the atoms have been read if p_natoms is not None and len(stru) != p_natoms: emsg = "expected %d atoms, read %d" % (p_natoms, len(stru)) raise StructureFormatError(emsg) return stru
p = getParser(fmt) try: pmethod = getattr(p, method) stru = pmethod(*args, **kwargs) self.format = fmt break except StructureFormatError, err: parsers_emsgs.append("%s: %s" % (fmt, err)) except NotImplementedError: pass if stru is None: emsg = "\n".join([ "Unknown or invalid structure format.", "Errors per each tested structure format:" ] + parsers_emsgs) raise StructureFormatError(emsg) self.__dict__.update(p.__dict__) return stru # End of parseLines # End of class P_auto # Routines def getParser(): return P_auto()
def parseLines(self, lines): """Parse list of lines in PDB format. Return Structure instance or raise StructureFormatError. """ try: stru = Structure() scale = numpy.identity(3, dtype=float) scaleU = numpy.zeros(3, dtype=float) p_nl = 0 for line in lines: p_nl += 1 # skip blank lines if not line.strip(): continue # make sure line has 80 characters if len(line) < 80: line = "%-80s" % line words = line.split() record = words[0] if record == "TITLE": continuation = line[8:10] if continuation.strip(): stru.title += line[10:].rstrip() else: stru.title = line[10:].rstrip() elif record == "CRYST1": a = float(line[7:15]) b = float(line[15:24]) c = float(line[24:33]) alpha = float(line[33:40]) beta = float(line[40:47]) gamma = float(line[47:54]) stru.lattice.setLatPar(a, b, c, alpha, beta, gamma) scale = numpy.transpose(stru.lattice.recbase) elif record == "SCALE1": sc = numpy.zeros((3, 3), dtype=float) sc[0, :] = [float(x) for x in line[10:40].split()] scaleU[0] = float(line[45:55]) elif record == "SCALE2": sc[1, :] = [float(x) for x in line[10:40].split()] scaleU[1] = float(line[45:55]) elif record == "SCALE3": sc[2, :] = [float(x) for x in line[10:40].split()] scaleU[2] = float(line[45:55]) base = numpy.transpose(numpy.linalg.inv(sc)) abcABGcryst = numpy.array(stru.lattice.abcABG()) stru.lattice.setLatBase(base) abcABGscale = numpy.array(stru.lattice.abcABG()) reldiff = numpy.fabs(1.0 - abcABGscale / abcABGcryst) if not numpy.all(reldiff < 1.0e-4): emsg = "%d: " % p_nl + \ "SCALE and CRYST1 are not consistent." raise StructureFormatError(emsg) if numpy.any(scaleU != 0.0): emsg = "Origin offset not yet implemented." raise NotImplementedError(emsg) elif record in ("ATOM", "HETATM"): name = line[12:16].strip() rc = [float(x) for x in line[30:54].split()] try: occupancy = float(line[54:60]) except ValueError: occupancy = 1.0 try: B = float(line[60:66]) uiso = B / (8 * pi**2) except ValueError: uiso = 0.0 element = line[76:78].strip() if element == "": # get element from the first 2 characters of name element = line[12:14].strip() element = element[0].upper() + element[1:].lower() stru.addNewAtom(element, occupancy=occupancy, label=name) last_atom = stru.getLastAtom() last_atom.xyz_cartn = rc last_atom.Uisoequiv = uiso elif record == "SIGATM": sigrc = [float(x) for x in line[30:54].split()] sigxyz = numpy.dot(scale, sigrc) try: sigo = float(line[54:60]) except ValueError: sigo = 0.0 try: sigB = float(line[60:66]) sigU = numpy.identity(3) * sigB / (8 * pi**2) except ValueError: sigU = numpy.zeros((3, 3), dtype=float) last_atom.sigxyz = sigxyz last_atom.sigo = sigo last_atom.sigU = sigU elif record == "ANISOU": last_atom.anisotropy = True Uij = [float(x) * 1.0e-4 for x in line[28:70].split()] Ua = last_atom.U for i in range(3): Ua[i, i] = Uij[i] Ua[0, 1] = Ua[1, 0] = Uij[3] Ua[0, 2] = Ua[2, 0] = Uij[4] Ua[1, 2] = Ua[2, 1] = Uij[5] elif record == "SIGUIJ": sigUij = [float(x) * 1.0e-4 for x in line[28:70].split()] for i in range(3): last_atom.sigU[i, i] = sigUij[i] last_atom.sigU[0, 1] = last_atom.sigU[1, 0] = sigUij[3] last_atom.sigU[0, 2] = last_atom.sigU[2, 0] = sigUij[4] last_atom.sigU[1, 2] = last_atom.sigU[2, 1] = sigUij[5] elif record in P_pdb.validRecords: pass else: emsg = "%d: invalid record name '%r'" % (p_nl, record) raise StructureFormatError(emsg) except (ValueError, IndexError): emsg = "%d: invalid PDB record" % p_nl exc_type, exc_value, exc_traceback = sys.exc_info() raise StructureFormatError, emsg, exc_traceback return stru