Esempio n. 1
0
    def parseLines(self, lines):
        """Parse list of lines in RAWXYZ format.

        Return Structure object or raise StructureFormatError.
        """
        linefields = [l.split() for l in lines]
        # prepare output structure
        stru = Structure()
        # find first valid record
        start = 0
        for field in linefields:
            if len(field) == 0 or field[0] == "#":
                start += 1
            else:
                break
        # find the last valid record
        stop = len(lines)
        while stop > start and len(linefields[stop - 1]) == 0:
            stop -= 1
        # get out for empty structure
        if start >= stop:
            return stru
        # here we have at least one valid record line
        # figure out xyz layout from the first line for plain and raw formats
        floatfields = [isfloat(f) for f in linefields[start]]
        nfields = len(linefields[start])
        if nfields not in (3, 4):
            emsg = ("%d: invalid RAWXYZ format, expected 3 or 4 columns" %
                    (start + 1))
            raise StructureFormatError(emsg)
        if floatfields[:3] == [True, True, True]:
            el_idx, x_idx = (None, 0)
        elif floatfields[:4] == [False, True, True, True]:
            el_idx, x_idx = (0, 1)
        else:
            emsg = "%d: invalid RAWXYZ format" % (start + 1)
            raise StructureFormatError(emsg)
        # now try to read all record lines
        try:
            p_nl = start
            for fields in linefields[start:]:
                p_nl += 1
                if fields == []:
                    continue
                elif len(fields) != nfields:
                    emsg = ('%d: all lines must have ' +
                            'the same number of columns') % p_nl
                    raise StructureFormatError(emsg)
                element = el_idx is not None and fields[el_idx] or ""
                xyz = [float(f) for f in fields[x_idx:x_idx + 3]]
                if len(xyz) == 2:
                    xyz.append(0.0)
                stru.addNewAtom(element, xyz=xyz)
        except ValueError:
            emsg = "%d: invalid number" % p_nl
            exc_type, exc_value, exc_traceback = sys.exc_info()
            e = StructureFormatError(emsg)
            six.reraise(StructureFormatError, e, exc_traceback)
        return stru
Esempio n. 2
0
    def parseLines(self, lines):
        """Parse list of lines in DISCUS format.

        Return PDFFitStructure instance or raise StructureFormatError.
        """
        self.lines = lines
        ilines = self._linesIterator()
        self.stru = PDFFitStructure()
        record_parsers = {
            "cell" : self._parse_cell,
            "format" : self._parse_format,
            "generator" : self._parse_not_implemented,
            "molecule" : self._parse_not_implemented,
            "ncell" : self._parse_ncell,
            "spcgr" : self._parse_spcgr,
            "symmetry" : self._parse_not_implemented,
            "title" : self._parse_title,
            "shape" : self._parse_shape,
        }
        try:
            # parse header
            for self.line in ilines:
                words = self.line.split()
                if not words or words[0][0] == '#': continue
                if words[0] == 'atoms':             break
                rp = record_parsers.get(words[0], self._parse_unknown_record)
                rp(words)
            # check if cell has been defined
            if not self.cell_read:
                emsg = "%d: unit cell not defined" % self.nl
                raise StructureFormatError(emsg)
            # parse atoms
            for self.line in ilines:
                words = self.line.replace(',', ' ').split()
                if not words or words[0][0] == '#': continue
                self._parse_atom(words)
            # self consistency check
            exp_natoms = reduce(lambda x,y : x*y, self.stru.pdffit['ncell'])
            # only check if ncell record exists
            if self.ncell_read and exp_natoms != len(self.stru):
                emsg = 'Expected %d atoms, read %d.' % \
                    (exp_natoms, len(self.stru))
                raise StructureFormatError(emsg)
            # take care of superlattice
            if self.stru.pdffit['ncell'][:3] != [1,1,1]:
                latpars = list(self.stru.lattice.abcABG())
                superlatpars = [ latpars[i]*self.stru.pdffit['ncell'][i]
                                 for i in range(3) ] + latpars[3:]
                superlattice = Lattice(*superlatpars)
                self.stru.placeInLattice(superlattice)
                self.stru.pdffit['ncell'] = [1, 1, 1, exp_natoms]
        except (ValueError, IndexError):
            exc_type, exc_value, exc_traceback = sys.exc_info()
            emsg = "%d: file is not in DISCUS format" % self.nl
            e = StructureFormatError(emsg)
            six.reraise(StructureFormatError, e, exc_traceback)
        return self.stru
Esempio n. 3
0
    def _parse_lattice(self, block):
        """Obtain lattice parameters from a CifBlock.
        This method updates self.stru.lattice.

        block -- instance of CifBlock

        No return value.
        """
        if '_cell_length_a' not in block: return
        # obtain lattice parameters
        try:
            latpars = (
                leading_float(block['_cell_length_a']),
                leading_float(block['_cell_length_b']),
                leading_float(block['_cell_length_c']),
                leading_float(block['_cell_angle_alpha']),
                leading_float(block['_cell_angle_beta']),
                leading_float(block['_cell_angle_gamma']),
            )
        except KeyError as err:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            emsg = str(err)
            e = StructureFormatError(emsg)
            six.reraise(StructureFormatError, e, exc_traceback)
        self.stru.lattice = Lattice(*latpars)
        return
Esempio n. 4
0
    def _wrapParseMethod(self, method, *args, **kwargs):
        """A helper evaluator method.  Try the specified parse method with
        each registered structure parser and return the first successful
        resul.  Structure parsers that match structure file extension are
        tried first.

        Set format attribute to the detected file format.
        Return Structure instance, or raise StructureFormatError.
        """
        from diffpy.structure.parsers import getParser
        ofmts = self._getOrderedFormats()
        stru = None
        # try all parsers in sequence
        parsers_emsgs = []
        for fmt in ofmts:
            p = getParser(fmt, **self.pkw)
            try:
                pmethod = getattr(p, method)
                stru = pmethod(*args, **kwargs)
                self.format = fmt
                break
            except StructureFormatError as err:
                parsers_emsgs.append("%s: %s" % (fmt, err))
            except NotImplementedError:
                pass
        if stru is None:
            emsg = "\n".join([
                "Unknown or invalid structure format.",
                "Errors per each tested structure format:"
            ] + parsers_emsgs)
            raise StructureFormatError(emsg)
        self.__dict__.update(p.__dict__)
        return stru
Esempio n. 5
0
 def _parse_format(self, words):
     """Process the format record from DISCUS structure file.
     """
     if words[1] == 'pdffit':
         emsg = "%d: file is not in DISCUS format" % self.nl
         raise StructureFormatError(emsg)
     return
Esempio n. 6
0
    def _parse_space_group_symop_operation_xyz(self, block):
        """Process symmetry operations from a CifBlock.  The method
        updates spacegroup and eau data according to symmetry
        operations defined in _space_group_symop_operation_xyz or
        _symmetry_equiv_pos_as_xyz items in CifBlock.

        block -- instance of CifBlock

        No return value.
        """
        from diffpy.structure.spacegroups import IsSpaceGroupIdentifier
        from diffpy.structure.spacegroups import SpaceGroup, GetSpaceGroup
        from diffpy.structure.spacegroups import FindSpaceGroup
        self.asymmetric_unit = list(self.stru)
        sym_synonyms = ('_space_group_symop_operation_xyz',
                        '_symmetry_equiv_pos_as_xyz')
        sym_loop_name = [n for n in sym_synonyms if n in block]
        # recover explicit list of symmetry operations
        symop_list = []
        if sym_loop_name:
            # sym_loop exists here and we know its cif name
            sym_loop_name = sym_loop_name[0]
            sym_loop = block.GetLoop(sym_loop_name)
            for eqxyz in sym_loop[sym_loop_name]:
                opcif = getSymOp(eqxyz)
                symop_list.append(opcif)
        # determine space group number
        sg_nameHall = (block.get('_space_group_name_Hall', '')
                       or block.get('_symmetry_space_group_name_Hall', ''))
        sg_nameHM = (block.get('_space_group_name_H-M_alt', '')
                     or block.get('_symmetry_space_group_name_H-M', ''))
        self.cif_sgname = (sg_nameHall or sg_nameHM or None)
        sgid = (block.get('_space_group_IT_number', '')
                or block.get('_symmetry_Int_Tables_number', '') or sg_nameHM)
        self.spacegroup = None
        # try to reuse existing space group from symmetry operations
        if symop_list:
            try:
                self.spacegroup = FindSpaceGroup(symop_list)
            except ValueError:
                pass
        # otherwise lookup the space group from its identifier
        if self.spacegroup is None and sgid and IsSpaceGroupIdentifier(sgid):
            self.spacegroup = GetSpaceGroup(sgid)
        # define new spacegroup when symmetry operations were listed, but
        # there is no match to an existing definition
        if symop_list and self.spacegroup is None:
            new_short_name = "CIF " + (sg_nameHall or 'data')
            new_crystal_system = (block.get('_space_group_crystal_system')
                                  or block.get('_symmetry_cell_setting')
                                  or 'TRICLINIC').upper()
            self.spacegroup = SpaceGroup(short_name=new_short_name,
                                         crystal_system=new_crystal_system,
                                         symop_list=symop_list)
        if self.spacegroup is None:
            emsg = "CIF file has unknown space group identifier {!r}."
            raise StructureFormatError(emsg.format(sgid))
        self._expandAsymmetricUnit(block)
        return
Esempio n. 7
0
 def _parse_cell(self, words):
     """Process the cell record from DISCUS structure file.
     """
     # split again on spaces or commas
     words = self.line.replace(',', ' ').split()
     latpars = [ float(w) for w in words[1:7] ]
     try:
         self.stru.lattice.setLatPar(*latpars)
     except ZeroDivisionError:
         emsg = "%d: Invalid lattice parameters - zero cell volume" % \
                 self.nl
         raise StructureFormatError(emsg)
     self.cell_read = True
     return
Esempio n. 8
0
def getParser(format, **kw):
    """Return Parser instance for a given structure format.

    kw   -- keyword arguments passed to the Parser init function.

    Raises StructureFormatError exception when format is not defined.
    """
    if format not in parser_index:
        emsg = "no parser for '%s' format" % format
        raise StructureFormatError(emsg)
    pmod = parser_index[format]['module']
    ns = {}
    import_cmd = 'from diffpy.structure.parsers import %s as pm' % pmod
    exec(import_cmd, ns)
    return ns['pm'].getParser(**kw)
Esempio n. 9
0
 def _parse_shape(self, words):
     """Process the shape record from DISCUS structure file.
     """
     # strip away any commas
     linefixed = " ".join(words).replace(',', ' ')
     wordsfixed = linefixed.split()
     shapetype = wordsfixed[1]
     if shapetype == 'sphere':
         self.stru.pdffit['spdiameter'] = float(words[2])
     elif shapetype == 'stepcut':
         self.stru.pdffit['stepcut'] = float(words[2])
     else:
         emsg = 'Invalid type of particle shape correction %r' % shapetype
         raise StructureFormatError(emsg)
     return
Esempio n. 10
0
    def _parse_shape(self, line):
        """Process shape line from PDFfit file and update self.stru

        line -- line containing data for particle shape correction

        No return value.
        Raise StructureFormatError for invalid record.
        """
        line_nocommas = line.replace(',', ' ')
        words = line_nocommas.split()
        assert words[0] == 'shape'
        shapetype = words[1]
        if shapetype == 'sphere':
            self.stru.pdffit['spdiameter'] = float(words[2])
        elif shapetype == 'stepcut':
            self.stru.pdffit['stepcut'] = float(words[2])
        else:
            emsg = 'Invalid type of particle shape correction %r' % shapetype
            raise StructureFormatError(emsg)
        return
Esempio n. 11
0
    def _parseCifDataSource(self, datasource):
        """\
        Open and process CIF data from the specified `datasource`.


        Parameters
        ----------
        datasource : str or a file-like object
            This is used as an argument to the CifFile class.  The CifFile
            instance is stored in `ciffile` attribute of this Parser.

        Returns
        -------
        Structure
            The Structure object loaded from the specified data source.

        Raises
        ------
        StructureFormatError
            When the data do not constitute a valid CIF format.
        """
        from CifFile import CifFile, StarError
        self.stru = None
        try:
            with _suppressCifParserOutput():
                # Use `grammar` option to digest values with curly-brackets.
                # Ref: https://bitbucket.org/jamesrhester/pycifrw/issues/19
                self.ciffile = CifFile(datasource, grammar='auto')
                for blockname in self.ciffile.keys():
                    self._parseCifBlock(blockname)
                    # stop after reading the first structure
                    if self.stru is not None:
                        break
        except (StarError, ValueError, IndexError) as err:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            emsg = str(err).strip()
            e = StructureFormatError(emsg)
            six.reraise(StructureFormatError, e, exc_traceback)
        return self.stru
Esempio n. 12
0
    def toLines(self, stru):
        """Convert Structure stru to a list of lines in XCFG atomeye format.

        Return list of strings.
        """
        if len(stru) == 0:
            emsg = "cannot convert empty structure to XCFG format"
            raise StructureFormatError(emsg)
        lines = []
        lines.append("Number of particles = %i" % len(stru))
        # figure out length unit A
        allxyz = numpy.array([a.xyz for a in stru])
        lo_xyz = allxyz.min(axis=0)
        hi_xyz = allxyz.max(axis=0)
        max_range_xyz = (hi_xyz-lo_xyz).max()
        if numpy.allclose(stru.lattice.abcABG(), (1, 1, 1, 90, 90, 90)):
            max_range_xyz += self.cluster_boundary
        # range of CFG coordinates must be less than 1
        p_A = numpy.ceil(max_range_xyz + 1.0e-13)
        # atomeye draws rubbish when boxsize is less than 3.5
        hi_ucvect = max([numpy.sqrt(numpy.dot(v,v)) for v in stru.lattice.base])
        if hi_ucvect*p_A < 3.5:
            p_A = numpy.ceil(3.5 / hi_ucvect)
        lines.append("A = %.8g Angstrom" % p_A)
        # how much do we need to shift the coordinates?
        p_dxyz = numpy.zeros(3, dtype=float)
        for i in range(3):
            if lo_xyz[i]/p_A < 0.0 or hi_xyz[i]/p_A >= 1.0 \
            or (lo_xyz[i] == hi_xyz[i] and lo_xyz[i] == 0.0) :
                p_dxyz[i] = 0.5 - (hi_xyz[i]+lo_xyz[i])/2.0/p_A
        # H0 tensor
        for i in range(3):
            for j in range(3):
                lines.append("H0(%i,%i) = %.8g A" %
                             (i + 1, j + 1, stru.lattice.base[i, j]))
        # get out for empty structure
        if len(stru) == 0: return lines
        a_first = stru[0]
        p_NO_VELOCITY = "v" not in a_first.__dict__
        if p_NO_VELOCITY:
            lines.append(".NO_VELOCITY.")
        # build a p_auxiliaries list of (aux_name,atom_expression) tuples
        # if stru came from xcfg file, it would store original auxiliaries in
        # xcfg dictionary
        try:
            p_auxiliaries = [ (aux, "a."+aux)
                for aux in stru.xcfg['auxiliaries'] ]
        except AttributeError:
            p_auxiliaries = []
        # add occupancy if any atom has nonunit occupancy
        for a in stru:
            if a.occupancy != 1.0:
                p_auxiliaries.append(('occupancy', 'a.occupancy'))
                break
        # add temperature factor with as many terms as needed
        # check whether all temperature factors are zero or isotropic
        p_allUzero = True
        p_allUiso = True
        for a in stru:
            if p_allUzero and numpy.any(a.U != 0.0):
                p_allUzero = False
            if not numpy.all(a.U == a.U[0,0]*numpy.identity(3)):
                p_allUiso = False
                # here p_allUzero must be false
                break
        if p_allUzero:
            pass
        elif p_allUiso:
            p_auxiliaries.append(('Uiso', 'uflat[0]'))
        else:
            p_auxiliaries.extend([('U11', 'uflat[0]'),
                                  ('U22', 'uflat[4]'),
                                  ('U33', 'uflat[8]')])
            # check if there are off-diagonal elements
            allU = numpy.array([a.U for a in stru])
            if numpy.any(allU[:,0,1] != 0.0):
                p_auxiliaries.append(('U12', 'uflat[1]'))
            if numpy.any(allU[:,0,2] != 0.0):
                p_auxiliaries.append(('U13', 'uflat[2]'))
            if numpy.any(allU[:,1,2] != 0.0):
                p_auxiliaries.append(('U23', 'uflat[5]'))
        # count entries
        p_entry_count = (3 if p_NO_VELOCITY else 6) + len(p_auxiliaries)
        lines.append("entry_count = %d" % p_entry_count)
        # add auxiliaries
        for i in range(len(p_auxiliaries)):
            lines.append("auxiliary[%d] = %s [au]" % (i, p_auxiliaries[i][0]))
        # now define entry format efmt for representing atom properties
        fmwords = ["{pos[0]:.8g}", "{pos[1]:.8g}", "{pos[2]:.8g}"]
        if not p_NO_VELOCITY:
            fmwords += ["{v[0]:.8g}", "{v[1]:.8g}", "{v[2]:.8g}"]
        fmwords += (('{' + e + ':.8g}') for p, e in p_auxiliaries)
        efmt = ' '.join(fmwords)
        # we are ready to output atoms:
        lines.append("")
        p_element = None
        for a in stru:
            if a.element != p_element:
                p_element = a.element
                lines.append("%.4f" % AtomicMass.get(p_element, 0.0))
                lines.append(p_element)
            pos = a.xyz / p_A + p_dxyz
            v = None if p_NO_VELOCITY else a.v
            uflat = numpy.ravel(a.U)
            entry = efmt.format(pos=pos, v=v, uflat=uflat, a=a)
            lines.append(entry)
        return lines
Esempio n. 13
0
    def parseLines(self, lines):
        """Parse list of lines in PDB format.

        Return Structure object or raise StructureFormatError.
        """
        xcfg_Number_of_particles = None
        xcfg_A = None
        xcfg_H0 = numpy.zeros((3,3), dtype=float)
        xcfg_H0_set = numpy.zeros((3,3), dtype=bool)
        xcfg_NO_VELOCITY = False
        xcfg_entry_count = None
        p_nl = 0
        p_auxiliary_re = re.compile(r"^auxiliary\[(\d+)\] =")
        p_auxiliary = {}
        stru = Structure()
        # ignore trailing blank lines
        stop = len(lines)
        for line in reversed(lines):
            if line.strip():
                break
            stop -= 1
        # iterator over the valid data lines
        ilines = iter(lines[:stop])
        try:
            # read XCFG header
            for line in ilines:
                p_nl += 1
                stripped_line = line.strip()
                # blank lines and lines starting with # are ignored
                if stripped_line == "" or line[0] == '#':
                    continue
                elif xcfg_Number_of_particles is None:
                    if line.find("Number of particles =") != 0:
                        emsg = ("%d: first line must " +
                                "contain 'Number of particles ='") % p_nl
                        raise StructureFormatError(emsg)
                    xcfg_Number_of_particles = int(line[21:].split(None, 1)[0])
                    p_natoms = xcfg_Number_of_particles
                elif line.find("A =") == 0:
                    xcfg_A = float(line[3:].split(None, 1)[0])
                elif line.find("H0(") == 0:
                    i, j = (int(line[3]) - 1, int(line[5]) - 1)
                    xcfg_H0[i,j] = float(line[10:].split(None, 1)[0])
                    xcfg_H0_set[i,j] = True
                elif line.find(".NO_VELOCITY.") == 0:
                    xcfg_NO_VELOCITY = True
                elif line.find("entry_count =") == 0:
                    xcfg_entry_count = int(line[13:].split(None, 1)[0])
                elif p_auxiliary_re.match(line):
                    m = p_auxiliary_re.match(line)
                    idx = int(m.group(1))
                    p_auxiliary[idx] = line[m.end():].split(None, 1)[0]
                else:
                    break
            # check header for consistency
            if numpy.any(xcfg_H0_set == False):
                emsg = "H0 tensor is not properly defined"
                raise StructureFormatError(emsg)
            p_auxnum = len(p_auxiliary) and max(p_auxiliary.keys())+1
            for i in range(p_auxnum):
                if not i in p_auxiliary:
                    p_auxiliary[i] = "aux%d" % i
            sorted_aux_keys = sorted(p_auxiliary.keys())
            if p_auxnum != 0:
                stru.xcfg = {
                    'auxiliaries' : [ p_auxiliary[k]
                                      for k in sorted_aux_keys ]
                }
            ecnt = len(p_auxiliary) + (3 if xcfg_NO_VELOCITY else 6)
            if ecnt != xcfg_entry_count:
                emsg = ("%d: auxiliary fields are "
                        "not consistent with entry_count") % p_nl
                raise StructureFormatError(emsg)
            # define proper lattice
            stru.lattice.setLatBase(xcfg_H0)
            # here we are inside the data block
            p_element = None
            for line in ilines:
                p_nl += 1
                words = line.split()
                # ignore atom mass
                if len(words) == 1 and isfloat(words[0]):
                    continue
                # parse element allowing empty symbol
                elif len(words) <= 1:
                    w = line.strip()
                    p_element = w[:1].upper() + w[1:].lower()
                elif len(words) == xcfg_entry_count and p_element is not None:
                    fields = [float(w) for w in words]
                    xyz = [xcfg_A * xi for xi in fields[:3]]
                    stru.addNewAtom(p_element, xyz=xyz)
                    a = stru[-1]
                    _assign_auxiliaries(a, fields, auxiliaries=p_auxiliary,
                                        no_velocity=xcfg_NO_VELOCITY)
                else:
                    emsg = "%d: invalid record" % p_nl
                    raise StructureFormatError(emsg)
            if len(stru) != p_natoms:
                emsg = "expected %d atoms, read %d" % (p_natoms, len(stru))
                raise StructureFormatError(emsg)
        except (ValueError, IndexError):
            emsg = "%d: file is not in XCFG format" % p_nl
            exc_type, exc_value, exc_traceback = sys.exc_info()
            e = StructureFormatError(emsg)
            six.reraise(StructureFormatError, e, exc_traceback)
        return stru
Esempio n. 14
0
    def parseLines(self, lines):
        """Parse list of lines in PDB format.

        Return Structure instance or raise StructureFormatError.
        """
        try:
            stru = Structure()
            scale = numpy.identity(3, dtype=float)
            scaleU = numpy.zeros(3, dtype=float)
            p_nl = 0
            for line in lines:
                p_nl += 1
                # skip blank lines
                if not line.strip():    continue
                # make sure line has 80 characters
                if len(line) < 80:
                    line = "%-80s" % line
                words = line.split()
                record = words[0]
                if record == "TITLE":
                    continuation = line[8:10]
                    if continuation.strip():
                        stru.title += line[10:].rstrip()
                    else:
                        stru.title = line[10:].rstrip()
                elif record == "CRYST1":
                    a = float(line[7:15])
                    b = float(line[15:24])
                    c = float(line[24:33])
                    alpha = float(line[33:40])
                    beta = float(line[40:47])
                    gamma = float(line[47:54])
                    stru.lattice.setLatPar(a, b, c, alpha, beta, gamma)
                    scale = numpy.transpose(stru.lattice.recbase)
                elif record == "SCALE1":
                    sc = numpy.zeros((3,3), dtype=float)
                    sc[0,:] = [float(x) for x in line[10:40].split()]
                    scaleU[0] = float(line[45:55])
                elif record == "SCALE2":
                    sc[1,:] = [float(x) for x in line[10:40].split()]
                    scaleU[1] = float(line[45:55])
                elif record == "SCALE3":
                    sc[2,:] = [float(x) for x in line[10:40].split()]
                    scaleU[2] = float(line[45:55])
                    base = numpy.transpose(numpy.linalg.inv(sc))
                    abcABGcryst = numpy.array(stru.lattice.abcABG())
                    stru.lattice.setLatBase(base)
                    abcABGscale = numpy.array(stru.lattice.abcABG())
                    reldiff = numpy.fabs(1.0 - abcABGscale/abcABGcryst)
                    if not numpy.all(reldiff < 1.0e-4):
                        emsg = "%d: " % p_nl + \
                                "SCALE and CRYST1 are not consistent."
                        raise StructureFormatError(emsg)
                    if numpy.any(scaleU != 0.0):
                        emsg = "Origin offset not yet implemented."
                        raise NotImplementedError(emsg)
                elif record in ("ATOM", "HETATM"):
                    name = line[12:16].strip()
                    rc = [float(x) for x in line[30:54].split()]
                    try:
                        occupancy = float(line[54:60])
                    except ValueError:
                        occupancy = 1.0
                    try:
                        B = float(line[60:66])
                        uiso = B/(8*pi**2)
                    except ValueError:
                        uiso = 0.0
                    element = line[76:78].strip()
                    if element == "":
                        # get element from the first 2 characters of name
                        element = line[12:14].strip()
                        element = element[0].upper() + element[1:].lower()
                    stru.addNewAtom(element,
                            occupancy=occupancy, label=name)
                    last_atom = stru.getLastAtom()
                    last_atom.xyz_cartn = rc
                    last_atom.Uisoequiv = uiso
                elif record == "SIGATM":
                    sigrc = [float(x) for x in line[30:54].split()]
                    sigxyz = numpy.dot(scale, sigrc)
                    try:
                        sigo = float(line[54:60])
                    except ValueError:
                        sigo = 0.0
                    try:
                        sigB = float(line[60:66])
                        sigU = numpy.identity(3)*sigB/(8*pi**2)
                    except ValueError:
                        sigU = numpy.zeros((3,3), dtype=float)
                    last_atom.sigxyz = sigxyz
                    last_atom.sigo = sigo
                    last_atom.sigU = sigU
                elif record == "ANISOU":
                    last_atom.anisotropy = True
                    Uij = [ float(x)*1.0e-4 for x in line[28:70].split() ]
                    Ua = last_atom.U
                    for i in range(3):
                        Ua[i,i] = Uij[i]
                    Ua[0,1] = Ua[1,0] = Uij[3]
                    Ua[0,2] = Ua[2,0] = Uij[4]
                    Ua[1,2] = Ua[2,1] = Uij[5]
                elif record == "SIGUIJ":
                    sigUij = [ float(x)*1.0e-4 for x in line[28:70].split() ]
                    for i in range(3):
                        last_atom.sigU[i,i] = sigUij[i]
                    last_atom.sigU[0,1] = last_atom.sigU[1,0] = sigUij[3]
                    last_atom.sigU[0,2] = last_atom.sigU[2,0] = sigUij[4]
                    last_atom.sigU[1,2] = last_atom.sigU[2,1] = sigUij[5]
                elif record in P_pdb.validRecords:
                    pass
                else:
                    emsg = "%d: invalid record name '%r'" % (p_nl, record)
                    raise StructureFormatError(emsg)
        except (ValueError, IndexError):
            emsg = "%d: invalid PDB record" % p_nl
            exc_type, exc_value, exc_traceback = sys.exc_info()
            e = StructureFormatError(emsg)
            six.reraise(StructureFormatError, e, exc_traceback)
        return stru
Esempio n. 15
0
    def parseLines(self, lines):
        """Parse list of lines in PDFfit format.

        Return Structure object or raise StructureFormatError.
        """
        p_nl = 0
        try:
            self.stru = PDFFitStructure()
            stru = self.stru
            cell_line_read = False
            stop = len(lines)
            while stop > 0 and lines[stop - 1].strip() == "":
                stop -= 1
            ilines = iter(lines[:stop])
            # read header of PDFFit file
            for l in ilines:
                p_nl += 1
                words = l.split()
                if len(words) == 0 or words[0][0] == '#':
                    continue
                elif words[0] == 'title':
                    stru.title = l.lstrip()[5:].strip()
                elif words[0] == 'scale':
                    stru.pdffit['scale'] = float(words[1])
                elif words[0] == 'sharp':
                    l1 = l.replace(',', ' ')
                    sharp_pars = [float(w) for w in l1.split()[1:]]
                    if len(sharp_pars) < 4:
                        stru.pdffit['delta2'] = sharp_pars[0]
                        stru.pdffit['sratio'] = sharp_pars[1]
                        stru.pdffit['rcut'] = sharp_pars[2]
                    else:
                        stru.pdffit['delta2'] = sharp_pars[0]
                        stru.pdffit['delta1'] = sharp_pars[1]
                        stru.pdffit['sratio'] = sharp_pars[2]
                        stru.pdffit['rcut'] = sharp_pars[3]
                elif words[0] == 'spcgr':
                    key = 'spcgr'
                    start = l.find(key) + len(key)
                    value = l[start:].strip()
                    stru.pdffit['spcgr'] = value
                elif words[0] == 'shape':
                    self._parse_shape(l)
                elif words[0] == 'cell':
                    cell_line_read = True
                    l1 = l.replace(',', ' ')
                    latpars = [float(w) for w in l1.split()[1:7]]
                    stru.lattice = Lattice(*latpars)
                elif words[0] == 'dcell':
                    l1 = l.replace(',', ' ')
                    stru.pdffit['dcell'] = [float(w) for w in l1.split()[1:7]]
                elif words[0] == 'ncell':
                    l1 = l.replace(',', ' ')
                    stru.pdffit['ncell'] = [int(w) for w in l1.split()[1:5]]
                elif words[0] == 'format':
                    if words[1] != 'pdffit':
                        emsg = "%d: file is not in PDFfit format" % p_nl
                        raise StructureFormatError(emsg)
                elif words[0] == 'atoms' and cell_line_read:
                    break
                else:
                    self.ignored_lines.append(l)
            # Header reading finished, check if required lines were present.
            if not cell_line_read:
                emsg = "%d: file is not in PDFfit format" % p_nl
                raise StructureFormatError(emsg)
            # Load data from atom entries.
            p_natoms = reduce(lambda x, y: x * y, stru.pdffit['ncell'])
            # we are now inside data block
            for l in ilines:
                p_nl += 1
                wl1 = l.split()
                element = wl1[0][0].upper() + wl1[0][1:].lower()
                xyz = [float(w) for w in wl1[1:4]]
                occ = float(wl1[4])
                stru.addNewAtom(element, xyz=xyz, occupancy=occ)
                a = stru.getLastAtom()
                p_nl += 1
                wl2 = next(ilines).split()
                a.sigxyz = [float(w) for w in wl2[0:3]]
                a.sigo = float(wl2[3])
                p_nl += 1
                wl3 = next(ilines).split()
                p_nl += 1
                wl4 = next(ilines).split()
                p_nl += 1
                wl5 = next(ilines).split()
                p_nl += 1
                wl6 = next(ilines).split()
                U = numpy.zeros((3, 3), dtype=float)
                sigU = numpy.zeros((3, 3), dtype=float)
                U[0, 0] = float(wl3[0])
                U[1, 1] = float(wl3[1])
                U[2, 2] = float(wl3[2])
                sigU[0, 0] = float(wl4[0])
                sigU[1, 1] = float(wl4[1])
                sigU[2, 2] = float(wl4[2])
                U[0, 1] = U[1, 0] = float(wl5[0])
                U[0, 2] = U[2, 0] = float(wl5[1])
                U[1, 2] = U[2, 1] = float(wl5[2])
                sigU[0, 1] = sigU[1, 0] = float(wl6[0])
                sigU[0, 2] = sigU[2, 0] = float(wl6[1])
                sigU[1, 2] = sigU[2, 1] = float(wl6[2])
                a.anisotropy = stru.lattice.isanisotropic(U)
                a.U = U
                a.sigU = sigU
            if len(stru) != p_natoms:
                emsg = "expected %d atoms, read %d" % (p_natoms, len(stru))
                raise StructureFormatError(emsg)
            if stru.pdffit['ncell'][:3] != [1, 1, 1]:
                superlatpars = [
                    latpars[i] * stru.pdffit['ncell'][i] for i in range(3)
                ] + latpars[3:]
                superlattice = Lattice(*superlatpars)
                stru.placeInLattice(superlattice)
                stru.pdffit['ncell'] = [1, 1, 1, p_natoms]
        except (ValueError, IndexError):
            emsg = "%d: file is not in PDFfit format" % p_nl
            exc_type, exc_value, exc_traceback = sys.exc_info()
            e = StructureFormatError(emsg)
            six.reraise(StructureFormatError, e, exc_traceback)
        return stru
Esempio n. 16
0
    def parseLines(self, lines):
        """Parse list of lines in XYZ format.

        Return Structure object or raise StructureFormatError.
        """
        linefields = [l.split() for l in lines]
        # prepare output structure
        stru = Structure()
        # find first valid record
        start = 0
        for field in linefields:
            if len(field) == 0 or field[0] == "#":
                start += 1
            else:
                break
        # first valid line gives number of atoms
        try:
            lfs = linefields[start]
            w1 = linefields[start][0]
            if len(lfs) == 1 and str(int(w1)) == w1:
                p_natoms = int(w1)
                stru.title = lines[start + 1].strip()
                start += 2
            else:
                emsg = ("%d: invalid XYZ format, missing number of atoms" %
                        (start + 1))
                raise StructureFormatError(emsg)
        except (IndexError, ValueError):
            exc_type, exc_value, exc_traceback = sys.exc_info()
            emsg = ("%d: invalid XYZ format, missing number of atoms" %
                    (start + 1))
            e = StructureFormatError(emsg)
            six.reraise(StructureFormatError, e, exc_traceback)
        # find the last valid record
        stop = len(lines)
        while stop > start and len(linefields[stop - 1]) == 0:
            stop -= 1
        # get out for empty structure
        if p_natoms == 0 or start >= stop:
            return stru
        # here we have at least one valid record line
        nfields = len(linefields[start])
        if nfields != 4:
            emsg = "%d: invalid XYZ format, expected 4 columns" % (start + 1)
            raise StructureFormatError(emsg)
        # now try to read all record lines
        try:
            p_nl = start
            for fields in linefields[start:]:
                p_nl += 1
                if fields == []:
                    continue
                elif len(fields) != nfields:
                    emsg = ('%d: all lines must have ' +
                            'the same number of columns') % p_nl
                    raise StructureFormatError(emsg)
                element = fields[0]
                element = element[0].upper() + element[1:].lower()
                xyz = [float(f) for f in fields[1:4]]
                stru.addNewAtom(element, xyz=xyz)
        except ValueError:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            emsg = "%d: invalid number format" % p_nl
            e = StructureFormatError(emsg)
            six.reraise(StructureFormatError, e, exc_traceback)
        # finally check if all the atoms have been read
        if p_natoms is not None and len(stru) != p_natoms:
            emsg = "expected %d atoms, read %d" % (p_natoms, len(stru))
            raise StructureFormatError(emsg)
        return stru