def parseLines(self, lines): """Parse list of lines in RAWXYZ format. Return Structure object or raise StructureFormatError. """ linefields = [l.split() for l in lines] # prepare output structure stru = Structure() # find first valid record start = 0 for field in linefields: if len(field) == 0 or field[0] == "#": start += 1 else: break # find the last valid record stop = len(lines) while stop > start and len(linefields[stop - 1]) == 0: stop -= 1 # get out for empty structure if start >= stop: return stru # here we have at least one valid record line # figure out xyz layout from the first line for plain and raw formats floatfields = [isfloat(f) for f in linefields[start]] nfields = len(linefields[start]) if nfields not in (3, 4): emsg = ("%d: invalid RAWXYZ format, expected 3 or 4 columns" % (start + 1)) raise StructureFormatError(emsg) if floatfields[:3] == [True, True, True]: el_idx, x_idx = (None, 0) elif floatfields[:4] == [False, True, True, True]: el_idx, x_idx = (0, 1) else: emsg = "%d: invalid RAWXYZ format" % (start + 1) raise StructureFormatError(emsg) # now try to read all record lines try: p_nl = start for fields in linefields[start:]: p_nl += 1 if fields == []: continue elif len(fields) != nfields: emsg = ('%d: all lines must have ' + 'the same number of columns') % p_nl raise StructureFormatError(emsg) element = el_idx is not None and fields[el_idx] or "" xyz = [float(f) for f in fields[x_idx:x_idx + 3]] if len(xyz) == 2: xyz.append(0.0) stru.addNewAtom(element, xyz=xyz) except ValueError: emsg = "%d: invalid number" % p_nl exc_type, exc_value, exc_traceback = sys.exc_info() e = StructureFormatError(emsg) six.reraise(StructureFormatError, e, exc_traceback) return stru
def parseLines(self, lines): """Parse list of lines in DISCUS format. Return PDFFitStructure instance or raise StructureFormatError. """ self.lines = lines ilines = self._linesIterator() self.stru = PDFFitStructure() record_parsers = { "cell" : self._parse_cell, "format" : self._parse_format, "generator" : self._parse_not_implemented, "molecule" : self._parse_not_implemented, "ncell" : self._parse_ncell, "spcgr" : self._parse_spcgr, "symmetry" : self._parse_not_implemented, "title" : self._parse_title, "shape" : self._parse_shape, } try: # parse header for self.line in ilines: words = self.line.split() if not words or words[0][0] == '#': continue if words[0] == 'atoms': break rp = record_parsers.get(words[0], self._parse_unknown_record) rp(words) # check if cell has been defined if not self.cell_read: emsg = "%d: unit cell not defined" % self.nl raise StructureFormatError(emsg) # parse atoms for self.line in ilines: words = self.line.replace(',', ' ').split() if not words or words[0][0] == '#': continue self._parse_atom(words) # self consistency check exp_natoms = reduce(lambda x,y : x*y, self.stru.pdffit['ncell']) # only check if ncell record exists if self.ncell_read and exp_natoms != len(self.stru): emsg = 'Expected %d atoms, read %d.' % \ (exp_natoms, len(self.stru)) raise StructureFormatError(emsg) # take care of superlattice if self.stru.pdffit['ncell'][:3] != [1,1,1]: latpars = list(self.stru.lattice.abcABG()) superlatpars = [ latpars[i]*self.stru.pdffit['ncell'][i] for i in range(3) ] + latpars[3:] superlattice = Lattice(*superlatpars) self.stru.placeInLattice(superlattice) self.stru.pdffit['ncell'] = [1, 1, 1, exp_natoms] except (ValueError, IndexError): exc_type, exc_value, exc_traceback = sys.exc_info() emsg = "%d: file is not in DISCUS format" % self.nl e = StructureFormatError(emsg) six.reraise(StructureFormatError, e, exc_traceback) return self.stru
def _parse_lattice(self, block): """Obtain lattice parameters from a CifBlock. This method updates self.stru.lattice. block -- instance of CifBlock No return value. """ if '_cell_length_a' not in block: return # obtain lattice parameters try: latpars = ( leading_float(block['_cell_length_a']), leading_float(block['_cell_length_b']), leading_float(block['_cell_length_c']), leading_float(block['_cell_angle_alpha']), leading_float(block['_cell_angle_beta']), leading_float(block['_cell_angle_gamma']), ) except KeyError as err: exc_type, exc_value, exc_traceback = sys.exc_info() emsg = str(err) e = StructureFormatError(emsg) six.reraise(StructureFormatError, e, exc_traceback) self.stru.lattice = Lattice(*latpars) return
def _wrapParseMethod(self, method, *args, **kwargs): """A helper evaluator method. Try the specified parse method with each registered structure parser and return the first successful resul. Structure parsers that match structure file extension are tried first. Set format attribute to the detected file format. Return Structure instance, or raise StructureFormatError. """ from diffpy.structure.parsers import getParser ofmts = self._getOrderedFormats() stru = None # try all parsers in sequence parsers_emsgs = [] for fmt in ofmts: p = getParser(fmt, **self.pkw) try: pmethod = getattr(p, method) stru = pmethod(*args, **kwargs) self.format = fmt break except StructureFormatError as err: parsers_emsgs.append("%s: %s" % (fmt, err)) except NotImplementedError: pass if stru is None: emsg = "\n".join([ "Unknown or invalid structure format.", "Errors per each tested structure format:" ] + parsers_emsgs) raise StructureFormatError(emsg) self.__dict__.update(p.__dict__) return stru
def _parse_format(self, words): """Process the format record from DISCUS structure file. """ if words[1] == 'pdffit': emsg = "%d: file is not in DISCUS format" % self.nl raise StructureFormatError(emsg) return
def _parse_space_group_symop_operation_xyz(self, block): """Process symmetry operations from a CifBlock. The method updates spacegroup and eau data according to symmetry operations defined in _space_group_symop_operation_xyz or _symmetry_equiv_pos_as_xyz items in CifBlock. block -- instance of CifBlock No return value. """ from diffpy.structure.spacegroups import IsSpaceGroupIdentifier from diffpy.structure.spacegroups import SpaceGroup, GetSpaceGroup from diffpy.structure.spacegroups import FindSpaceGroup self.asymmetric_unit = list(self.stru) sym_synonyms = ('_space_group_symop_operation_xyz', '_symmetry_equiv_pos_as_xyz') sym_loop_name = [n for n in sym_synonyms if n in block] # recover explicit list of symmetry operations symop_list = [] if sym_loop_name: # sym_loop exists here and we know its cif name sym_loop_name = sym_loop_name[0] sym_loop = block.GetLoop(sym_loop_name) for eqxyz in sym_loop[sym_loop_name]: opcif = getSymOp(eqxyz) symop_list.append(opcif) # determine space group number sg_nameHall = (block.get('_space_group_name_Hall', '') or block.get('_symmetry_space_group_name_Hall', '')) sg_nameHM = (block.get('_space_group_name_H-M_alt', '') or block.get('_symmetry_space_group_name_H-M', '')) self.cif_sgname = (sg_nameHall or sg_nameHM or None) sgid = (block.get('_space_group_IT_number', '') or block.get('_symmetry_Int_Tables_number', '') or sg_nameHM) self.spacegroup = None # try to reuse existing space group from symmetry operations if symop_list: try: self.spacegroup = FindSpaceGroup(symop_list) except ValueError: pass # otherwise lookup the space group from its identifier if self.spacegroup is None and sgid and IsSpaceGroupIdentifier(sgid): self.spacegroup = GetSpaceGroup(sgid) # define new spacegroup when symmetry operations were listed, but # there is no match to an existing definition if symop_list and self.spacegroup is None: new_short_name = "CIF " + (sg_nameHall or 'data') new_crystal_system = (block.get('_space_group_crystal_system') or block.get('_symmetry_cell_setting') or 'TRICLINIC').upper() self.spacegroup = SpaceGroup(short_name=new_short_name, crystal_system=new_crystal_system, symop_list=symop_list) if self.spacegroup is None: emsg = "CIF file has unknown space group identifier {!r}." raise StructureFormatError(emsg.format(sgid)) self._expandAsymmetricUnit(block) return
def _parse_cell(self, words): """Process the cell record from DISCUS structure file. """ # split again on spaces or commas words = self.line.replace(',', ' ').split() latpars = [ float(w) for w in words[1:7] ] try: self.stru.lattice.setLatPar(*latpars) except ZeroDivisionError: emsg = "%d: Invalid lattice parameters - zero cell volume" % \ self.nl raise StructureFormatError(emsg) self.cell_read = True return
def getParser(format, **kw): """Return Parser instance for a given structure format. kw -- keyword arguments passed to the Parser init function. Raises StructureFormatError exception when format is not defined. """ if format not in parser_index: emsg = "no parser for '%s' format" % format raise StructureFormatError(emsg) pmod = parser_index[format]['module'] ns = {} import_cmd = 'from diffpy.structure.parsers import %s as pm' % pmod exec(import_cmd, ns) return ns['pm'].getParser(**kw)
def _parse_shape(self, words): """Process the shape record from DISCUS structure file. """ # strip away any commas linefixed = " ".join(words).replace(',', ' ') wordsfixed = linefixed.split() shapetype = wordsfixed[1] if shapetype == 'sphere': self.stru.pdffit['spdiameter'] = float(words[2]) elif shapetype == 'stepcut': self.stru.pdffit['stepcut'] = float(words[2]) else: emsg = 'Invalid type of particle shape correction %r' % shapetype raise StructureFormatError(emsg) return
def _parse_shape(self, line): """Process shape line from PDFfit file and update self.stru line -- line containing data for particle shape correction No return value. Raise StructureFormatError for invalid record. """ line_nocommas = line.replace(',', ' ') words = line_nocommas.split() assert words[0] == 'shape' shapetype = words[1] if shapetype == 'sphere': self.stru.pdffit['spdiameter'] = float(words[2]) elif shapetype == 'stepcut': self.stru.pdffit['stepcut'] = float(words[2]) else: emsg = 'Invalid type of particle shape correction %r' % shapetype raise StructureFormatError(emsg) return
def _parseCifDataSource(self, datasource): """\ Open and process CIF data from the specified `datasource`. Parameters ---------- datasource : str or a file-like object This is used as an argument to the CifFile class. The CifFile instance is stored in `ciffile` attribute of this Parser. Returns ------- Structure The Structure object loaded from the specified data source. Raises ------ StructureFormatError When the data do not constitute a valid CIF format. """ from CifFile import CifFile, StarError self.stru = None try: with _suppressCifParserOutput(): # Use `grammar` option to digest values with curly-brackets. # Ref: https://bitbucket.org/jamesrhester/pycifrw/issues/19 self.ciffile = CifFile(datasource, grammar='auto') for blockname in self.ciffile.keys(): self._parseCifBlock(blockname) # stop after reading the first structure if self.stru is not None: break except (StarError, ValueError, IndexError) as err: exc_type, exc_value, exc_traceback = sys.exc_info() emsg = str(err).strip() e = StructureFormatError(emsg) six.reraise(StructureFormatError, e, exc_traceback) return self.stru
def toLines(self, stru): """Convert Structure stru to a list of lines in XCFG atomeye format. Return list of strings. """ if len(stru) == 0: emsg = "cannot convert empty structure to XCFG format" raise StructureFormatError(emsg) lines = [] lines.append("Number of particles = %i" % len(stru)) # figure out length unit A allxyz = numpy.array([a.xyz for a in stru]) lo_xyz = allxyz.min(axis=0) hi_xyz = allxyz.max(axis=0) max_range_xyz = (hi_xyz-lo_xyz).max() if numpy.allclose(stru.lattice.abcABG(), (1, 1, 1, 90, 90, 90)): max_range_xyz += self.cluster_boundary # range of CFG coordinates must be less than 1 p_A = numpy.ceil(max_range_xyz + 1.0e-13) # atomeye draws rubbish when boxsize is less than 3.5 hi_ucvect = max([numpy.sqrt(numpy.dot(v,v)) for v in stru.lattice.base]) if hi_ucvect*p_A < 3.5: p_A = numpy.ceil(3.5 / hi_ucvect) lines.append("A = %.8g Angstrom" % p_A) # how much do we need to shift the coordinates? p_dxyz = numpy.zeros(3, dtype=float) for i in range(3): if lo_xyz[i]/p_A < 0.0 or hi_xyz[i]/p_A >= 1.0 \ or (lo_xyz[i] == hi_xyz[i] and lo_xyz[i] == 0.0) : p_dxyz[i] = 0.5 - (hi_xyz[i]+lo_xyz[i])/2.0/p_A # H0 tensor for i in range(3): for j in range(3): lines.append("H0(%i,%i) = %.8g A" % (i + 1, j + 1, stru.lattice.base[i, j])) # get out for empty structure if len(stru) == 0: return lines a_first = stru[0] p_NO_VELOCITY = "v" not in a_first.__dict__ if p_NO_VELOCITY: lines.append(".NO_VELOCITY.") # build a p_auxiliaries list of (aux_name,atom_expression) tuples # if stru came from xcfg file, it would store original auxiliaries in # xcfg dictionary try: p_auxiliaries = [ (aux, "a."+aux) for aux in stru.xcfg['auxiliaries'] ] except AttributeError: p_auxiliaries = [] # add occupancy if any atom has nonunit occupancy for a in stru: if a.occupancy != 1.0: p_auxiliaries.append(('occupancy', 'a.occupancy')) break # add temperature factor with as many terms as needed # check whether all temperature factors are zero or isotropic p_allUzero = True p_allUiso = True for a in stru: if p_allUzero and numpy.any(a.U != 0.0): p_allUzero = False if not numpy.all(a.U == a.U[0,0]*numpy.identity(3)): p_allUiso = False # here p_allUzero must be false break if p_allUzero: pass elif p_allUiso: p_auxiliaries.append(('Uiso', 'uflat[0]')) else: p_auxiliaries.extend([('U11', 'uflat[0]'), ('U22', 'uflat[4]'), ('U33', 'uflat[8]')]) # check if there are off-diagonal elements allU = numpy.array([a.U for a in stru]) if numpy.any(allU[:,0,1] != 0.0): p_auxiliaries.append(('U12', 'uflat[1]')) if numpy.any(allU[:,0,2] != 0.0): p_auxiliaries.append(('U13', 'uflat[2]')) if numpy.any(allU[:,1,2] != 0.0): p_auxiliaries.append(('U23', 'uflat[5]')) # count entries p_entry_count = (3 if p_NO_VELOCITY else 6) + len(p_auxiliaries) lines.append("entry_count = %d" % p_entry_count) # add auxiliaries for i in range(len(p_auxiliaries)): lines.append("auxiliary[%d] = %s [au]" % (i, p_auxiliaries[i][0])) # now define entry format efmt for representing atom properties fmwords = ["{pos[0]:.8g}", "{pos[1]:.8g}", "{pos[2]:.8g}"] if not p_NO_VELOCITY: fmwords += ["{v[0]:.8g}", "{v[1]:.8g}", "{v[2]:.8g}"] fmwords += (('{' + e + ':.8g}') for p, e in p_auxiliaries) efmt = ' '.join(fmwords) # we are ready to output atoms: lines.append("") p_element = None for a in stru: if a.element != p_element: p_element = a.element lines.append("%.4f" % AtomicMass.get(p_element, 0.0)) lines.append(p_element) pos = a.xyz / p_A + p_dxyz v = None if p_NO_VELOCITY else a.v uflat = numpy.ravel(a.U) entry = efmt.format(pos=pos, v=v, uflat=uflat, a=a) lines.append(entry) return lines
def parseLines(self, lines): """Parse list of lines in PDB format. Return Structure object or raise StructureFormatError. """ xcfg_Number_of_particles = None xcfg_A = None xcfg_H0 = numpy.zeros((3,3), dtype=float) xcfg_H0_set = numpy.zeros((3,3), dtype=bool) xcfg_NO_VELOCITY = False xcfg_entry_count = None p_nl = 0 p_auxiliary_re = re.compile(r"^auxiliary\[(\d+)\] =") p_auxiliary = {} stru = Structure() # ignore trailing blank lines stop = len(lines) for line in reversed(lines): if line.strip(): break stop -= 1 # iterator over the valid data lines ilines = iter(lines[:stop]) try: # read XCFG header for line in ilines: p_nl += 1 stripped_line = line.strip() # blank lines and lines starting with # are ignored if stripped_line == "" or line[0] == '#': continue elif xcfg_Number_of_particles is None: if line.find("Number of particles =") != 0: emsg = ("%d: first line must " + "contain 'Number of particles ='") % p_nl raise StructureFormatError(emsg) xcfg_Number_of_particles = int(line[21:].split(None, 1)[0]) p_natoms = xcfg_Number_of_particles elif line.find("A =") == 0: xcfg_A = float(line[3:].split(None, 1)[0]) elif line.find("H0(") == 0: i, j = (int(line[3]) - 1, int(line[5]) - 1) xcfg_H0[i,j] = float(line[10:].split(None, 1)[0]) xcfg_H0_set[i,j] = True elif line.find(".NO_VELOCITY.") == 0: xcfg_NO_VELOCITY = True elif line.find("entry_count =") == 0: xcfg_entry_count = int(line[13:].split(None, 1)[0]) elif p_auxiliary_re.match(line): m = p_auxiliary_re.match(line) idx = int(m.group(1)) p_auxiliary[idx] = line[m.end():].split(None, 1)[0] else: break # check header for consistency if numpy.any(xcfg_H0_set == False): emsg = "H0 tensor is not properly defined" raise StructureFormatError(emsg) p_auxnum = len(p_auxiliary) and max(p_auxiliary.keys())+1 for i in range(p_auxnum): if not i in p_auxiliary: p_auxiliary[i] = "aux%d" % i sorted_aux_keys = sorted(p_auxiliary.keys()) if p_auxnum != 0: stru.xcfg = { 'auxiliaries' : [ p_auxiliary[k] for k in sorted_aux_keys ] } ecnt = len(p_auxiliary) + (3 if xcfg_NO_VELOCITY else 6) if ecnt != xcfg_entry_count: emsg = ("%d: auxiliary fields are " "not consistent with entry_count") % p_nl raise StructureFormatError(emsg) # define proper lattice stru.lattice.setLatBase(xcfg_H0) # here we are inside the data block p_element = None for line in ilines: p_nl += 1 words = line.split() # ignore atom mass if len(words) == 1 and isfloat(words[0]): continue # parse element allowing empty symbol elif len(words) <= 1: w = line.strip() p_element = w[:1].upper() + w[1:].lower() elif len(words) == xcfg_entry_count and p_element is not None: fields = [float(w) for w in words] xyz = [xcfg_A * xi for xi in fields[:3]] stru.addNewAtom(p_element, xyz=xyz) a = stru[-1] _assign_auxiliaries(a, fields, auxiliaries=p_auxiliary, no_velocity=xcfg_NO_VELOCITY) else: emsg = "%d: invalid record" % p_nl raise StructureFormatError(emsg) if len(stru) != p_natoms: emsg = "expected %d atoms, read %d" % (p_natoms, len(stru)) raise StructureFormatError(emsg) except (ValueError, IndexError): emsg = "%d: file is not in XCFG format" % p_nl exc_type, exc_value, exc_traceback = sys.exc_info() e = StructureFormatError(emsg) six.reraise(StructureFormatError, e, exc_traceback) return stru
def parseLines(self, lines): """Parse list of lines in PDB format. Return Structure instance or raise StructureFormatError. """ try: stru = Structure() scale = numpy.identity(3, dtype=float) scaleU = numpy.zeros(3, dtype=float) p_nl = 0 for line in lines: p_nl += 1 # skip blank lines if not line.strip(): continue # make sure line has 80 characters if len(line) < 80: line = "%-80s" % line words = line.split() record = words[0] if record == "TITLE": continuation = line[8:10] if continuation.strip(): stru.title += line[10:].rstrip() else: stru.title = line[10:].rstrip() elif record == "CRYST1": a = float(line[7:15]) b = float(line[15:24]) c = float(line[24:33]) alpha = float(line[33:40]) beta = float(line[40:47]) gamma = float(line[47:54]) stru.lattice.setLatPar(a, b, c, alpha, beta, gamma) scale = numpy.transpose(stru.lattice.recbase) elif record == "SCALE1": sc = numpy.zeros((3,3), dtype=float) sc[0,:] = [float(x) for x in line[10:40].split()] scaleU[0] = float(line[45:55]) elif record == "SCALE2": sc[1,:] = [float(x) for x in line[10:40].split()] scaleU[1] = float(line[45:55]) elif record == "SCALE3": sc[2,:] = [float(x) for x in line[10:40].split()] scaleU[2] = float(line[45:55]) base = numpy.transpose(numpy.linalg.inv(sc)) abcABGcryst = numpy.array(stru.lattice.abcABG()) stru.lattice.setLatBase(base) abcABGscale = numpy.array(stru.lattice.abcABG()) reldiff = numpy.fabs(1.0 - abcABGscale/abcABGcryst) if not numpy.all(reldiff < 1.0e-4): emsg = "%d: " % p_nl + \ "SCALE and CRYST1 are not consistent." raise StructureFormatError(emsg) if numpy.any(scaleU != 0.0): emsg = "Origin offset not yet implemented." raise NotImplementedError(emsg) elif record in ("ATOM", "HETATM"): name = line[12:16].strip() rc = [float(x) for x in line[30:54].split()] try: occupancy = float(line[54:60]) except ValueError: occupancy = 1.0 try: B = float(line[60:66]) uiso = B/(8*pi**2) except ValueError: uiso = 0.0 element = line[76:78].strip() if element == "": # get element from the first 2 characters of name element = line[12:14].strip() element = element[0].upper() + element[1:].lower() stru.addNewAtom(element, occupancy=occupancy, label=name) last_atom = stru.getLastAtom() last_atom.xyz_cartn = rc last_atom.Uisoequiv = uiso elif record == "SIGATM": sigrc = [float(x) for x in line[30:54].split()] sigxyz = numpy.dot(scale, sigrc) try: sigo = float(line[54:60]) except ValueError: sigo = 0.0 try: sigB = float(line[60:66]) sigU = numpy.identity(3)*sigB/(8*pi**2) except ValueError: sigU = numpy.zeros((3,3), dtype=float) last_atom.sigxyz = sigxyz last_atom.sigo = sigo last_atom.sigU = sigU elif record == "ANISOU": last_atom.anisotropy = True Uij = [ float(x)*1.0e-4 for x in line[28:70].split() ] Ua = last_atom.U for i in range(3): Ua[i,i] = Uij[i] Ua[0,1] = Ua[1,0] = Uij[3] Ua[0,2] = Ua[2,0] = Uij[4] Ua[1,2] = Ua[2,1] = Uij[5] elif record == "SIGUIJ": sigUij = [ float(x)*1.0e-4 for x in line[28:70].split() ] for i in range(3): last_atom.sigU[i,i] = sigUij[i] last_atom.sigU[0,1] = last_atom.sigU[1,0] = sigUij[3] last_atom.sigU[0,2] = last_atom.sigU[2,0] = sigUij[4] last_atom.sigU[1,2] = last_atom.sigU[2,1] = sigUij[5] elif record in P_pdb.validRecords: pass else: emsg = "%d: invalid record name '%r'" % (p_nl, record) raise StructureFormatError(emsg) except (ValueError, IndexError): emsg = "%d: invalid PDB record" % p_nl exc_type, exc_value, exc_traceback = sys.exc_info() e = StructureFormatError(emsg) six.reraise(StructureFormatError, e, exc_traceback) return stru
def parseLines(self, lines): """Parse list of lines in PDFfit format. Return Structure object or raise StructureFormatError. """ p_nl = 0 try: self.stru = PDFFitStructure() stru = self.stru cell_line_read = False stop = len(lines) while stop > 0 and lines[stop - 1].strip() == "": stop -= 1 ilines = iter(lines[:stop]) # read header of PDFFit file for l in ilines: p_nl += 1 words = l.split() if len(words) == 0 or words[0][0] == '#': continue elif words[0] == 'title': stru.title = l.lstrip()[5:].strip() elif words[0] == 'scale': stru.pdffit['scale'] = float(words[1]) elif words[0] == 'sharp': l1 = l.replace(',', ' ') sharp_pars = [float(w) for w in l1.split()[1:]] if len(sharp_pars) < 4: stru.pdffit['delta2'] = sharp_pars[0] stru.pdffit['sratio'] = sharp_pars[1] stru.pdffit['rcut'] = sharp_pars[2] else: stru.pdffit['delta2'] = sharp_pars[0] stru.pdffit['delta1'] = sharp_pars[1] stru.pdffit['sratio'] = sharp_pars[2] stru.pdffit['rcut'] = sharp_pars[3] elif words[0] == 'spcgr': key = 'spcgr' start = l.find(key) + len(key) value = l[start:].strip() stru.pdffit['spcgr'] = value elif words[0] == 'shape': self._parse_shape(l) elif words[0] == 'cell': cell_line_read = True l1 = l.replace(',', ' ') latpars = [float(w) for w in l1.split()[1:7]] stru.lattice = Lattice(*latpars) elif words[0] == 'dcell': l1 = l.replace(',', ' ') stru.pdffit['dcell'] = [float(w) for w in l1.split()[1:7]] elif words[0] == 'ncell': l1 = l.replace(',', ' ') stru.pdffit['ncell'] = [int(w) for w in l1.split()[1:5]] elif words[0] == 'format': if words[1] != 'pdffit': emsg = "%d: file is not in PDFfit format" % p_nl raise StructureFormatError(emsg) elif words[0] == 'atoms' and cell_line_read: break else: self.ignored_lines.append(l) # Header reading finished, check if required lines were present. if not cell_line_read: emsg = "%d: file is not in PDFfit format" % p_nl raise StructureFormatError(emsg) # Load data from atom entries. p_natoms = reduce(lambda x, y: x * y, stru.pdffit['ncell']) # we are now inside data block for l in ilines: p_nl += 1 wl1 = l.split() element = wl1[0][0].upper() + wl1[0][1:].lower() xyz = [float(w) for w in wl1[1:4]] occ = float(wl1[4]) stru.addNewAtom(element, xyz=xyz, occupancy=occ) a = stru.getLastAtom() p_nl += 1 wl2 = next(ilines).split() a.sigxyz = [float(w) for w in wl2[0:3]] a.sigo = float(wl2[3]) p_nl += 1 wl3 = next(ilines).split() p_nl += 1 wl4 = next(ilines).split() p_nl += 1 wl5 = next(ilines).split() p_nl += 1 wl6 = next(ilines).split() U = numpy.zeros((3, 3), dtype=float) sigU = numpy.zeros((3, 3), dtype=float) U[0, 0] = float(wl3[0]) U[1, 1] = float(wl3[1]) U[2, 2] = float(wl3[2]) sigU[0, 0] = float(wl4[0]) sigU[1, 1] = float(wl4[1]) sigU[2, 2] = float(wl4[2]) U[0, 1] = U[1, 0] = float(wl5[0]) U[0, 2] = U[2, 0] = float(wl5[1]) U[1, 2] = U[2, 1] = float(wl5[2]) sigU[0, 1] = sigU[1, 0] = float(wl6[0]) sigU[0, 2] = sigU[2, 0] = float(wl6[1]) sigU[1, 2] = sigU[2, 1] = float(wl6[2]) a.anisotropy = stru.lattice.isanisotropic(U) a.U = U a.sigU = sigU if len(stru) != p_natoms: emsg = "expected %d atoms, read %d" % (p_natoms, len(stru)) raise StructureFormatError(emsg) if stru.pdffit['ncell'][:3] != [1, 1, 1]: superlatpars = [ latpars[i] * stru.pdffit['ncell'][i] for i in range(3) ] + latpars[3:] superlattice = Lattice(*superlatpars) stru.placeInLattice(superlattice) stru.pdffit['ncell'] = [1, 1, 1, p_natoms] except (ValueError, IndexError): emsg = "%d: file is not in PDFfit format" % p_nl exc_type, exc_value, exc_traceback = sys.exc_info() e = StructureFormatError(emsg) six.reraise(StructureFormatError, e, exc_traceback) return stru
def parseLines(self, lines): """Parse list of lines in XYZ format. Return Structure object or raise StructureFormatError. """ linefields = [l.split() for l in lines] # prepare output structure stru = Structure() # find first valid record start = 0 for field in linefields: if len(field) == 0 or field[0] == "#": start += 1 else: break # first valid line gives number of atoms try: lfs = linefields[start] w1 = linefields[start][0] if len(lfs) == 1 and str(int(w1)) == w1: p_natoms = int(w1) stru.title = lines[start + 1].strip() start += 2 else: emsg = ("%d: invalid XYZ format, missing number of atoms" % (start + 1)) raise StructureFormatError(emsg) except (IndexError, ValueError): exc_type, exc_value, exc_traceback = sys.exc_info() emsg = ("%d: invalid XYZ format, missing number of atoms" % (start + 1)) e = StructureFormatError(emsg) six.reraise(StructureFormatError, e, exc_traceback) # find the last valid record stop = len(lines) while stop > start and len(linefields[stop - 1]) == 0: stop -= 1 # get out for empty structure if p_natoms == 0 or start >= stop: return stru # here we have at least one valid record line nfields = len(linefields[start]) if nfields != 4: emsg = "%d: invalid XYZ format, expected 4 columns" % (start + 1) raise StructureFormatError(emsg) # now try to read all record lines try: p_nl = start for fields in linefields[start:]: p_nl += 1 if fields == []: continue elif len(fields) != nfields: emsg = ('%d: all lines must have ' + 'the same number of columns') % p_nl raise StructureFormatError(emsg) element = fields[0] element = element[0].upper() + element[1:].lower() xyz = [float(f) for f in fields[1:4]] stru.addNewAtom(element, xyz=xyz) except ValueError: exc_type, exc_value, exc_traceback = sys.exc_info() emsg = "%d: invalid number format" % p_nl e = StructureFormatError(emsg) six.reraise(StructureFormatError, e, exc_traceback) # finally check if all the atoms have been read if p_natoms is not None and len(stru) != p_natoms: emsg = "expected %d atoms, read %d" % (p_natoms, len(stru)) raise StructureFormatError(emsg) return stru