Beispiel #1
0
    def parseLines(self, lines):
        """Parse list of lines in RAWXYZ format.

        Return Structure object or raise StructureFormatError.
        """
        linefields = [l.split() for l in lines]
        # prepare output structure
        stru = Structure()
        # find first valid record
        start = 0
        for field in linefields:
            if len(field) == 0 or field[0] == "#":
                start += 1
            else:
                break
        # find the last valid record
        stop = len(lines)
        while stop > start and len(linefields[stop-1]) == 0:
            stop -= 1
        # get out for empty structure
        if start >= stop:
            return stru
        # here we have at least one valid record line
        # figure out xyz layout from the first line for plain and raw formats
        floatfields = [ isfloat(f) for f in linefields[start] ]
        nfields = len(linefields[start])
        if nfields not in (3, 4):
            emsg = ("%d: invalid RAWXYZ format, expected 3 or 4 columns" %
                    (start + 1))
            raise StructureFormatError(emsg)
        if floatfields[:3] == [True, True, True]:
            el_idx, x_idx = (None, 0)
        elif floatfields[:4] == [False, True, True, True]:
            el_idx, x_idx = (0, 1)
        else:
            emsg = "%d: invalid RAWXYZ format" % (start + 1)
            raise StructureFormatError(emsg)
        # now try to read all record lines
        try:
            p_nl = start
            for fields in linefields[start:] :
                p_nl += 1
                if fields == []:
                    continue
                elif len(fields) != nfields:
                    emsg = ('%d: all lines must have ' +
                            'the same number of columns') % p_nl
                    raise StructureFormatError, emsg
                element = el_idx is not None and fields[el_idx] or ""
                xyz = [ float(f) for f in fields[x_idx:x_idx+3] ]
                if len(xyz) == 2:
                    xyz.append(0.0)
                stru.addNewAtom(element, xyz=xyz)
        except ValueError:
            emsg = "%d: invalid number" % p_nl
            exc_type, exc_value, exc_traceback = sys.exc_info()
            raise StructureFormatError, emsg, exc_traceback
        return stru
Beispiel #2
0
    def parseLines(self, lines):
        """Parse list of lines in DISCUS format.

        Return PDFFitStructure instance or raise StructureFormatError.
        """
        self.lines = lines
        ilines = self._linesIterator()
        self.stru = PDFFitStructure()
        record_parsers = {
            "cell": self._parse_cell,
            "format": self._parse_format,
            "generator": self._parse_not_implemented,
            "molecule": self._parse_not_implemented,
            "ncell": self._parse_ncell,
            "spcgr": self._parse_spcgr,
            "symmetry": self._parse_not_implemented,
            "title": self._parse_title,
            "shape": self._parse_shape,
        }
        try:
            # parse header
            for self.line in ilines:
                words = self.line.split()
                if not words or words[0][0] == '#': continue
                if words[0] == 'atoms': break
                rp = record_parsers.get(words[0], self._parse_unknown_record)
                rp(words)
            # check if cell has been defined
            if not self.cell_read:
                emsg = "%d: unit cell not defined" % self.nl
                raise StructureFormatError(emsg)
            # parse atoms
            for self.line in ilines:
                words = self.line.split()
                if not words or words[0][0] == '#': continue
                self._parse_atom(words)
            # self consistency check
            exp_natoms = reduce(lambda x, y: x * y, self.stru.pdffit['ncell'])
            # only check if ncell record exists
            if self.ncell_read and exp_natoms != len(self.stru):
                emsg = 'Expected %d atoms, read %d.' % \
                    (exp_natoms, len(self.stru))
                raise StructureFormatError(emsg)
            # take care of superlattice
            if self.stru.pdffit['ncell'][:3] != [1, 1, 1]:
                latpars = list(self.stru.lattice.abcABG())
                superlatpars = [
                    latpars[i] * self.stru.pdffit['ncell'][i] for i in range(3)
                ] + latpars[3:]
                superlattice = Lattice(*superlatpars)
                self.stru.placeInLattice(superlattice)
                self.stru.pdffit['ncell'] = [1, 1, 1, exp_natoms]
        except (ValueError, IndexError):
            exc_type, exc_value, exc_traceback = sys.exc_info()
            emsg = "%d: file is not in DISCUS format" % self.nl
            raise StructureFormatError, emsg, exc_traceback
        return self.stru
Beispiel #3
0
 def _parse_format(self, words):
     """Process the format record from DISCUS structure file.
     """
     if words[1] == 'pdffit':
         emsg = "%d: file is not in DISCUS format" % self.nl
         raise StructureFormatError(emsg)
     return
Beispiel #4
0
def getParser(format):
    """Return Parser instance for a given structure format.
    Raises StructureFormatError exception when format is not defined.
    """
    if format not in parser_index:
        emsg = "no parser for '%s' format" % format
        raise StructureFormatError(emsg)
    pmod = parser_index[format]['module']
    import_cmd = 'from diffpy.Structure.Parsers import %s as pm' % pmod
    exec(import_cmd)
    return pm.getParser()
Beispiel #5
0
 def _parse_cell(self, words):
     """Process the cell record from DISCUS structure file.
     """
     # split again on spaces or commas
     words = self.line.replace(',', ' ').split()
     latpars = [ float(w) for w in words[1:7] ]
     try:
         self.stru.lattice.setLatPar(*latpars)
     except ZeroDivisionError:
         emsg = "%d: Invalid lattice parameters - zero cell volume" % \
                 self.nl
         raise StructureFormatError(emsg)
     self.cell_read = True
     return
Beispiel #6
0
def getParser(format, **kw):
    """Return Parser instance for a given structure format.

    kw   -- keyword arguments passed to the Parser init function.

    Raises StructureFormatError exception when format is not defined.
    """
    if format not in parser_index:
        emsg = "no parser for '%s' format" % format
        raise StructureFormatError(emsg)
    pmod = parser_index[format]['module']
    pm = None
    import_cmd = 'from diffpy.Structure.Parsers import %s as pm' % pmod
    exec(import_cmd)
    return pm.getParser(**kw)
Beispiel #7
0
    def parseLines(self, lines):
        """Parse list of lines in PDFfit format.

        Return Structure object or raise StructureFormatError.
        """
        p_nl = 0
        rlist = []
        try:
            self.stru = PDFFitStructure()
            stru = self.stru
            cell_line_read = False
            stop = len(lines)
            while stop > 0 and lines[stop - 1].strip() == "":
                stop -= 1
            ilines = iter(lines[:stop])
            # read header of PDFFit file
            for l in ilines:
                p_nl += 1
                words = l.split()
                if len(words) == 0 or words[0][0] == '#':
                    continue
                elif words[0] == 'title':
                    stru.title = l.lstrip()[5:].strip()
                elif words[0] == 'scale':
                    stru.pdffit['scale'] = float(words[1])
                elif words[0] == 'sharp':
                    l1 = l.replace(',', ' ')
                    sharp_pars = [float(w) for w in l1.split()[1:]]
                    if len(sharp_pars) < 4:
                        stru.pdffit['delta2'] = sharp_pars[0]
                        stru.pdffit['sratio'] = sharp_pars[1]
                        stru.pdffit['rcut'] = sharp_pars[2]
                    else:
                        stru.pdffit['delta2'] = sharp_pars[0]
                        stru.pdffit['delta1'] = sharp_pars[1]
                        stru.pdffit['sratio'] = sharp_pars[2]
                        stru.pdffit['rcut'] = sharp_pars[3]
                elif words[0] == 'spcgr':
                    key = 'spcgr'
                    start = l.find(key) + len(key)
                    value = l[start:].strip()
                    stru.pdffit['spcgr'] = value
                elif words[0] == 'shape':
                    self._parse_shape(l)
                elif words[0] == 'cell':
                    cell_line_read = True
                    l1 = l.replace(',', ' ')
                    latpars = [float(w) for w in l1.split()[1:7]]
                    stru.lattice = Lattice(*latpars)
                elif words[0] == 'dcell':
                    l1 = l.replace(',', ' ')
                    stru.pdffit['dcell'] = [float(w) for w in l1.split()[1:7]]
                elif words[0] == 'ncell':
                    l1 = l.replace(',', ' ')
                    stru.pdffit['ncell'] = [int(w) for w in l1.split()[1:5]]
                elif words[0] == 'format':
                    if words[1] != 'pdffit':
                        emsg = "%d: file is not in PDFfit format" % p_nl
                        raise StructureFormatError(emsg)
                elif words[0] == 'atoms' and cell_line_read:
                    break
                else:
                    self.ignored_lines.append(l)
            # Header reading finished, check if required lines were present.
            if not cell_line_read:
                emsg = "%d: file is not in PDFfit format" % p_nl
                raise StructureFormatError(emsg)
            # Load data from atom entries.
            p_natoms = reduce(lambda x, y: x * y, stru.pdffit['ncell'])
            # we are now inside data block
            for l in ilines:
                p_nl += 1
                wl1 = l.split()
                element = wl1[0][0].upper() + wl1[0][1:].lower()
                xyz = [float(w) for w in wl1[1:4]]
                occ = float(wl1[4])
                stru.addNewAtom(element, xyz=xyz, occupancy=occ)
                a = stru.getLastAtom()
                p_nl += 1
                wl2 = ilines.next().split()
                a.sigxyz = [float(w) for w in wl2[0:3]]
                a.sigo = float(wl2[3])
                p_nl += 1
                wl3 = ilines.next().split()
                p_nl += 1
                wl4 = ilines.next().split()
                p_nl += 1
                wl5 = ilines.next().split()
                p_nl += 1
                wl6 = ilines.next().split()
                a.sigU = numpy.zeros((3, 3), dtype=float)
                a.U11 = float(wl3[0])
                a.U22 = float(wl3[1])
                a.U33 = float(wl3[2])
                a.sigU[0, 0] = float(wl4[0])
                a.sigU[1, 1] = float(wl4[1])
                a.sigU[2, 2] = float(wl4[2])
                a.U12 = float(wl5[0])
                a.U13 = float(wl5[1])
                a.U23 = float(wl5[2])
                a.sigU[0, 1] = a.sigU[1, 0] = float(wl6[0])
                a.sigU[0, 2] = a.sigU[2, 0] = float(wl6[1])
                a.sigU[1, 2] = a.sigU[2, 1] = float(wl6[2])
            if len(stru) != p_natoms:
                emsg = "expected %d atoms, read %d" % (p_natoms, len(stru))
                raise StructureFormatError(emsg)
            if stru.pdffit['ncell'][:3] != [1, 1, 1]:
                superlatpars = [
                    latpars[i] * stru.pdffit['ncell'][i] for i in range(3)
                ] + latpars[3:]
                superlattice = Lattice(*superlatpars)
                stru.placeInLattice(superlattice)
                stru.pdffit['ncell'] = [1, 1, 1, p_natoms]
        except (ValueError, IndexError):
            emsg = "%d: file is not in PDFfit format" % p_nl
            exc_type, exc_value, exc_traceback = sys.exc_info()
            raise StructureFormatError, emsg, exc_traceback
        return stru
Beispiel #8
0
    def toLines(self, stru):
        """Convert Structure stru to a list of lines in XCFG atomeye format.

        Return list of strings.
        """
        if len(stru) == 0:
            emsg = "cannot convert empty structure to XCFG format"
            raise StructureFormatError(emsg)
        lines = []
        lines.append("Number of particles = %i" % len(stru))
        # figure out length unit A
        allxyz = numpy.array([a.xyz for a in stru])
        lo_xyz = allxyz.min(axis=0)
        hi_xyz = allxyz.max(axis=0)
        max_range_xyz = (hi_xyz - lo_xyz).max()
        if numpy.allclose(stru.lattice.abcABG(), (1, 1, 1, 90, 90, 90)):
            max_range_xyz += self.cluster_boundary
        # range of CFG coordinates must be less than 1
        p_A = numpy.ceil(max_range_xyz + 1.0e-13)
        # atomeye draws rubbish when boxsize is less than 3.5
        hi_ucvect = max(
            [numpy.sqrt(numpy.dot(v, v)) for v in stru.lattice.base])
        if hi_ucvect * p_A < 3.5:
            p_A = numpy.ceil(3.5 / hi_ucvect)
        lines.append("A = %.8g Angstrom" % p_A)
        # how much do we need to shift the coordinates?
        p_dxyz = numpy.zeros(3, dtype=float)
        for i in range(3):
            if lo_xyz[i]/p_A < 0.0 or hi_xyz[i]/p_A >= 1.0 \
            or (lo_xyz[i] == hi_xyz[i] and lo_xyz[i] == 0.0) :
                p_dxyz[i] = 0.5 - (hi_xyz[i] + lo_xyz[i]) / 2.0 / p_A
        # H0 tensor
        for i in range(3):
            for j in range(3):
                lines.append("H0(%i,%i) = %.8g A" %
                             (i + 1, j + 1, stru.lattice.base[i, j]))
        # get out for empty structure
        if len(stru) == 0: return lines
        a_first = stru[0]
        p_NO_VELOCITY = "v" not in a_first.__dict__
        if p_NO_VELOCITY:
            lines.append(".NO_VELOCITY.")
        # build a p_auxiliaries list of (aux_name,atom_expression) tuples
        # if stru came from xcfg file, it would store original auxiliaries in
        # xcfg dictionary
        try:
            p_auxiliaries = [(aux, "a." + aux)
                             for aux in stru.xcfg['auxiliaries']]
        except AttributeError:
            p_auxiliaries = []
        # add occupancy if any atom has nonunit occupancy
        for a in stru:
            if a.occupancy != 1.0:
                p_auxiliaries.append(('occupancy', 'a.occupancy'))
                break
        # add temperature factor with as many terms as needed
        # check whether all temperature factors are zero or isotropic
        p_allUzero = True
        p_allUiso = True
        for a in stru:
            if p_allUzero and numpy.any(a.U != 0.0):
                p_allUzero = False
            if not numpy.all(a.U == a.U[0, 0] * numpy.identity(3)):
                p_allUiso = False
                # here p_allUzero must be false
                break
        if p_allUzero:
            pass
        elif p_allUiso:
            p_auxiliaries.append(('Uiso', 'uflat[0]'))
        else:
            p_auxiliaries.extend([('U11', 'uflat[0]'), ('U22', 'uflat[4]'),
                                  ('U33', 'uflat[8]')])
            # check if there are off-diagonal elements
            allU = numpy.array([a.U for a in stru])
            if numpy.any(allU[:, 0, 1] != 0.0):
                p_auxiliaries.append(('U12', 'uflat[1]'))
            if numpy.any(allU[:, 0, 2] != 0.0):
                p_auxiliaries.append(('U13', 'uflat[2]'))
            if numpy.any(allU[:, 1, 2] != 0.0):
                p_auxiliaries.append(('U23', 'uflat[5]'))
        # count entries
        p_entry_count = (3 if p_NO_VELOCITY else 6) + len(p_auxiliaries)
        lines.append("entry_count = %d" % p_entry_count)
        # add auxiliaries
        for i in range(len(p_auxiliaries)):
            lines.append("auxiliary[%d] = %s [au]" % (i, p_auxiliaries[i][0]))
        # now define entry format efmt for representing atom properties
        fmwords = ["{pos[0]:.8g}", "{pos[1]:.8g}", "{pos[2]:.8g}"]
        if not p_NO_VELOCITY:
            fmwords += ["{v[0]:.8g}", "{v[1]:.8g}", "{v[2]:.8g}"]
        fmwords += (('{' + e + ':.8g}') for p, e in p_auxiliaries)
        efmt = ' '.join(fmwords)
        # we are ready to output atoms:
        lines.append("")
        p_element = None
        for a in stru:
            if a.element != p_element:
                p_element = a.element
                lines.append("%.4f" % AtomicMass.get(p_element, 0.0))
                lines.append(p_element)
            pos = a.xyz / p_A + p_dxyz
            v = None if p_NO_VELOCITY else a.v
            uflat = numpy.ravel(a.U)
            entry = efmt.format(pos=pos, v=v, uflat=uflat, a=a)
            lines.append(entry)
        return lines
Beispiel #9
0
    def parseLines(self, lines):
        """Parse list of lines in PDB format.

        Return Structure object or raise StructureFormatError.
        """
        xcfg_Number_of_particles = None
        xcfg_A = None
        xcfg_H0 = numpy.zeros((3, 3), dtype=float)
        xcfg_H0_set = numpy.zeros((3, 3), dtype=bool)
        xcfg_NO_VELOCITY = False
        xcfg_entry_count = None
        p_nl = 0
        p_auxiliary_re = re.compile(r"^auxiliary\[(\d+)\] =")
        p_auxiliary = {}
        stru = Structure()
        # ignore trailing blank lines
        stop = len(lines)
        for line in reversed(lines):
            if line.strip():
                break
            stop -= 1
        # iterator over the valid data lines
        ilines = iter(lines[:stop])
        try:
            # read XCFG header
            for line in ilines:
                p_nl += 1
                stripped_line = line.strip()
                # blank lines and lines starting with # are ignored
                if stripped_line == "" or line[0] == '#':
                    continue
                elif xcfg_Number_of_particles is None:
                    if line.find("Number of particles =") != 0:
                        emsg = ("%d: first line must " +
                                "contain 'Number of particles ='") % p_nl
                        raise StructureFormatError(emsg)
                    xcfg_Number_of_particles = int(line[21:].split(None, 1)[0])
                    p_natoms = xcfg_Number_of_particles
                elif line.find("A =") == 0:
                    xcfg_A = float(line[3:].split(None, 1)[0])
                elif line.find("H0(") == 0:
                    i, j = (int(line[3]) - 1, int(line[5]) - 1)
                    xcfg_H0[i, j] = float(line[10:].split(None, 1)[0])
                    xcfg_H0_set[i, j] = True
                elif line.find(".NO_VELOCITY.") == 0:
                    xcfg_NO_VELOCITY = True
                elif line.find("entry_count =") == 0:
                    xcfg_entry_count = int(line[13:].split(None, 1)[0])
                elif p_auxiliary_re.match(line):
                    m = p_auxiliary_re.match(line)
                    idx = int(m.group(1))
                    p_auxiliary[idx] = line[m.end():].split(None, 1)[0]
                else:
                    break
            # check header for consistency
            if numpy.any(xcfg_H0_set == False):
                emsg = "H0 tensor is not properly defined"
                raise StructureFormatError(emsg)
            p_auxnum = len(p_auxiliary) and max(p_auxiliary.keys()) + 1
            for i in range(p_auxnum):
                if not i in p_auxiliary:
                    p_auxiliary[i] = "aux%d" % i
            sorted_aux_keys = sorted(p_auxiliary.keys())
            if p_auxnum != 0:
                stru.xcfg = {
                    'auxiliaries': [p_auxiliary[k] for k in sorted_aux_keys]
                }
            ecnt = len(p_auxiliary) + (3 if xcfg_NO_VELOCITY else 6)
            if ecnt != xcfg_entry_count:
                emsg = ("%d: auxiliary fields are "
                        "not consistent with entry_count") % p_nl
                raise StructureFormatError(emsg)
            # define proper lattice
            stru.lattice.setLatBase(xcfg_H0)
            # here we are inside the data block
            p_element = None
            for line in ilines:
                p_nl += 1
                words = line.split()
                # ignore atom mass
                if len(words) == 1 and isfloat(words[0]):
                    continue
                # parse element allowing empty symbol
                elif len(words) <= 1:
                    w = line.strip()
                    p_element = w[:1].upper() + w[1:].lower()
                elif len(words) == xcfg_entry_count and p_element is not None:
                    fields = [float(w) for w in words]
                    xyz = [xcfg_A * xi for xi in fields[:3]]
                    stru.addNewAtom(p_element, xyz=xyz)
                    a = stru[-1]
                    _assign_auxiliaries(a,
                                        fields,
                                        auxiliaries=p_auxiliary,
                                        no_velocity=xcfg_NO_VELOCITY)
                else:
                    emsg = "%d: invalid record" % p_nl
                    raise StructureFormatError(emsg)
            if len(stru) != p_natoms:
                emsg = "expected %d atoms, read %d" % (p_natoms, len(stru))
                raise StructureFormatError(emsg)
        except (ValueError, IndexError):
            emsg = "%d: file is not in XCFG format" % p_nl
            exc_type, exc_value, exc_traceback = sys.exc_info()
            raise StructureFormatError, emsg, exc_traceback
        return stru
    def parseLines(self, lines):
        """Parse list of lines in atoms format.

        Return Structure object or raise StructureFormatError.
        """

        comlist = ["#", "%", "!", "*"]
        atoms = []
        title = ""
        anext = False
        sg = None
        structure = BRAtomsStructure()
        meta = structure.bratoms
        pdict = dict.fromkeys(self.plist)


        # Count the lines
        ln = 0
        try:

            for line in lines:
                ln += 1

                # Strip comments from the line
                for c in comlist:
                    idx = line.find(c)
                    if idx != -1:
                        line = line[:idx]

                # Move on if there is not a line
                if not line: continue

                # Move on if there was only white space in the line
                sline = line.split()
                if not sline: continue

                # Check if we have atoms following
                if sline[0].startswith("atom"):
                    anext = True
                    continue

                # Check for title
                if sline[0].startswith("title"):
                    if title: title += "\n"
                    title += line[5:]
                    continue

                # Get rid of pesky "=" and "," signs
                while "=" in sline: sline.remove("=")
                while "," in sline: sline.remove(",")

                # space group
                if sline and sline[0].startswith("space"):
                    meta["space"] = line[5:].strip()
                    continue

                # output
                if sline and sline[0].startswith("output"):
                    meta["output"] = line[6:].strip()
                    continue

                # shift
                if sline and sline[0].startswith("shift"):
                    meta["shift"] = line[5:].strip()
                    continue

                # Check for other metadata
                while sline and sline[0].strip() in meta:
                    key = sline.pop(0).strip()
                    if key == "central": key = "core"
                    meta[key] = sline.pop(0).strip()

                # Check for lattice information.
                while sline and sline[0].strip() in self.plist:
                    key = sline.pop(0).strip()
                    pdict[key] = float(sline.pop(0))

                # Check for atom information
                if sline and anext:

                    elraw = sline.pop(0).strip()
                    el = elraw[:1].upper() + elraw[1:].lower()
                    x = float(sline.pop(0))
                    y = float(sline.pop(0))
                    z = float(sline.pop(0))

                    tag = ""
                    if sline:
                        tag = sline.pop(0).strip()
                    occ = 1.0
                    if sline:
                        occ = float(sline.pop(0))

                    a = Atom( atype = el,
                        xyz = [x,y,z],
                        name = tag,
                        occupancy = occ)

                    atoms.append(a)

        except (ValueError, IndexError), e:
            emsg = "%d: file is not in Atoms format" % ln
            raise StructureFormatError(emsg)
class P_bratoms(StructureParser):
    """Parser for Bruce Ravel's Atoms structure format.
    """

    plist = ["a", "b", "c", "alpha", "beta", "gamma"]

    def __init__(self):
        StructureParser.__init__(self)
        self.format = "bratoms"
        return

    def parseLines(self, lines):
        """Parse list of lines in atoms format.

        Return Structure object or raise StructureFormatError.
        """

        comlist = ["#", "%", "!", "*"]
        atoms = []
        title = ""
        anext = False
        sg = None
        structure = BRAtomsStructure()
        meta = structure.bratoms
        pdict = dict.fromkeys(self.plist)


        # Count the lines
        ln = 0
        try:

            for line in lines:
                ln += 1

                # Strip comments from the line
                for c in comlist:
                    idx = line.find(c)
                    if idx != -1:
                        line = line[:idx]

                # Move on if there is not a line
                if not line: continue

                # Move on if there was only white space in the line
                sline = line.split()
                if not sline: continue

                # Check if we have atoms following
                if sline[0].startswith("atom"):
                    anext = True
                    continue

                # Check for title
                if sline[0].startswith("title"):
                    if title: title += "\n"
                    title += line[5:]
                    continue

                # Get rid of pesky "=" and "," signs
                while "=" in sline: sline.remove("=")
                while "," in sline: sline.remove(",")

                # space group
                if sline and sline[0].startswith("space"):
                    meta["space"] = line[5:].strip()
                    continue

                # output
                if sline and sline[0].startswith("output"):
                    meta["output"] = line[6:].strip()
                    continue

                # shift
                if sline and sline[0].startswith("shift"):
                    meta["shift"] = line[5:].strip()
                    continue

                # Check for other metadata
                while sline and sline[0].strip() in meta:
                    key = sline.pop(0).strip()
                    if key == "central": key = "core"
                    meta[key] = sline.pop(0).strip()

                # Check for lattice information.
                while sline and sline[0].strip() in self.plist:
                    key = sline.pop(0).strip()
                    pdict[key] = float(sline.pop(0))

                # Check for atom information
                if sline and anext:

                    elraw = sline.pop(0).strip()
                    el = elraw[:1].upper() + elraw[1:].lower()
                    x = float(sline.pop(0))
                    y = float(sline.pop(0))
                    z = float(sline.pop(0))

                    tag = ""
                    if sline:
                        tag = sline.pop(0).strip()
                    occ = 1.0
                    if sline:
                        occ = float(sline.pop(0))

                    a = Atom( atype = el,
                        xyz = [x,y,z],
                        name = tag,
                        occupancy = occ)

                    atoms.append(a)

        except (ValueError, IndexError), e:
            emsg = "%d: file is not in Atoms format" % ln
            raise StructureFormatError(emsg)

        # Make sure we have atoms.
        if len(atoms) == 0:
            raise StructureFormatError("File contains no atoms")

        # Make sure we have unit cell parameters
        if pdict["a"] is None:
            emsg = "Missing definition of cell parameter"
            raise StructureFormatError(emsg)

        # Fill in optional information if it was missing.
        if pdict["alpha"] is None:
            pdict["alpha"] = 90.0
        if pdict["beta"] is None:
            pdict["beta"] = pdict["alpha"]
        if pdict["gamma"] is None:
            pdict["gamma"] = pdict["alpha"]
        if pdict["b"] is None:
            pdict["b"] = pdict["a"]
        if pdict["c"] is None:
            pdict["c"] = pdict["a"]

        if meta['core'] is None:
            meta['core'] = atoms[0].element

        lat = Lattice(**pdict)
        structure.title = title
        structure.lattice = lat
        structure.extend(atoms)
        return structure
Beispiel #12
0
    def parseLines(self, lines):
        """Parse list of lines in XYZ format.

        Return Structure object or raise StructureFormatError.
        """
        linefields = [l.split() for l in lines]
        # prepare output structure
        stru = Structure()
        # find first valid record
        start = 0
        for field in linefields:
            if len(field) == 0 or field[0] == "#":
                start += 1
            else:
                break
        # first valid line gives number of atoms
        try:
            lfs = linefields[start]
            w1 = linefields[start][0]
            if len(lfs) == 1 and str(int(w1)) == w1:
                p_natoms = int(w1)
                #try to get lattice vectors from description line
                try:
                    latticeVecs = list(map(float, linefields[start+1]))
                    assert len(latticeVecs)==9, "Expect 9 numbers for the 3 basis vectors"
                    reshaped = [latticeVecs[0:3], latticeVecs[3:6], latticeVecs[6:9]]
                    stru.lattice = Lattice(base=reshaped) 
                    needsDescription = True
                except:
                    import traceback as tb
                    import warnings
                    warnings.warn("Failed to parse lattice vectors: \n{}".format(tb.format_exc()))
                    needsDescription = False
                    stru.description = lines[start+1].strip()
                start += 2
            else:
                emsg = ("%d: invalid XYZ format, missing number of atoms" %
                        (start + 1))
                raise StructureFormatError(emsg)
        except (IndexError, ValueError):
            exc_type, exc_value, exc_traceback = sys.exc_info()
            emsg = ("%d: invalid XYZ format, missing number of atoms" %
                    (start + 1))
            raise StructureFormatError(emsg).with_traceback(exc_traceback)
        # find the last valid record
        stop = len(lines)
        while stop > start and len(linefields[stop-1]) == 0:
            stop -= 1
        # get out for empty structure
        if p_natoms == 0 or start >= stop:
            return stru
        # here we have at least one valid record line
        nfields = len(linefields[start])
        if nfields != 4 and nfields != 5:
            emsg = "%d: invalid XYZ format, expected 4 or 5 columns" % (start + 1)
            raise StructureFormatError(emsg)
        # now try to read all record lines
        try:
            p_nl = start
            for fields in linefields[start:] :
                p_nl += 1
                if fields == []:
                    continue
                elif len(fields) != 4 and len(fields) !=5:
                    emsg = ('%d: all lines must have ' +
                            'a symbol, position, and optionally charge') % p_nl
                    raise StructureFormatError(emsg)
                symbol = fields[0]
                symbol = symbol[0].upper() + symbol[1:].lower()
                xyz = [ float(f) for f in fields[1:4] ]                 
                if len(fields)==5:
                    charge = float(fields[4])
                else:
                    charge = 0.0
                stru.addNewAtom(symbol, xyz=xyz)
                stru.getLastAtom().charge=charge
        except ValueError:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            emsg = "%d: invalid number format" % p_nl
            raise StructureFormatError(emsg).with_traceback(exc_traceback)
        # finally check if all the atoms have been read
        if p_natoms is not None and len(stru) != p_natoms:
            emsg = "expected %d atoms, read %d" % (p_natoms, len(stru))
            raise StructureFormatError(emsg)
        # if needsDescription:
        #     stru.generateDescription()
        return stru
Beispiel #13
0
    def parseLines(self, lines):
        """Parse list of lines in PDB format.

        Return Structure object or raise StructureFormatError.
        """
        xcfg_Number_of_particles = None
        xcfg_A = None
        xcfg_H0 = numpy.zeros((3,3), dtype=float)
        xcfg_H0_set = numpy.zeros((3,3), dtype=bool)
        xcfg_NO_VELOCITY = False
        xcfg_entry_count = None
        xcfg_auxiliary = []
        p_nl = 0
        p_auxiliary_re = re.compile(r"^auxiliary\[(\d+)\] =")
        p_auxiliary = {}
        try:
            stru = Structure()
            # ignore trailing blank lines
            stop = len(lines)
            while stop>0 and lines[stop-1].strip() == "":
                stop -= 1
            ilines = iter(lines[:stop])
            # read XCFG header
            for line in ilines:
                p_nl += 1
                stripped_line = line.strip()
                # blank lines and lines starting with # are ignored
                if stripped_line == "" or line[0] == '#':
                    continue
                elif xcfg_Number_of_particles is None:
                    if line.find("Number of particles =") != 0:
                        emsg = ("%d: first line must " +
                                "contain 'Number of particles ='") % p_nl
                        raise StructureFormatError(emsg)
                    xcfg_Number_of_particles = int(line[21:].split(None, 1)[0])
                    p_natoms = xcfg_Number_of_particles
                elif line.find("A =") == 0:
                    xcfg_A = float(line[3:].split(None, 1)[0])
                elif line.find("H0(") == 0:
                    i, j = ( int(line[3])-1 ,  int(line[5])-1 )
                    xcfg_H0[i,j] = float(line[10:].split(None, 1)[0])
                    xcfg_H0_set[i,j] = True
                elif line.find(".NO_VELOCITY.") == 0:
                    xcfg_NO_VELOCITY = True
                elif line.find("entry_count =") == 0:
                    xcfg_entry_count = int(line[13:].split(None, 1)[0])
                elif p_auxiliary_re.match(line):
                    m = p_auxiliary_re.match(line)
                    idx = int(m.group(1))
                    p_auxiliary[idx] = line[m.end():].split(None, 1)[0]
                else:
                    break
            # check header for consistency
            if numpy.any(xcfg_H0_set == False):
                emsg = "H0 tensor is not properly defined"
                raise StructureFormatError(emsg)
            p_auxnum = len(p_auxiliary) and max(p_auxiliary.keys())+1
            for i in range(p_auxnum):
                if not i in p_auxiliary:
                    p_auxiliary[i] = "aux%d" % i
            sorted_aux_keys = p_auxiliary.keys()
            sorted_aux_keys.sort()
            if p_auxnum != 0:
                stru.xcfg = {
                    'auxiliaries' : [ p_auxiliary[k]
                                      for k in sorted_aux_keys ]
                }
            if 6-3*xcfg_NO_VELOCITY+len(p_auxiliary) != xcfg_entry_count:
                emsg = ("%d: auxiliary fields " +
                        "not consistent with entry_count") % p_nl
                raise StructureFormatError(emsg)
            # define proper lattice
            stru.lattice.setLatBase(xcfg_H0)
            # build p_assign_atom function to assign entries to proper fields
            p_exprs = [ "a.xyz[0]=fields[0]",
                        "a.xyz[1]=fields[1]",
                        "a.xyz[2]=fields[2]" ]
            if not xcfg_NO_VELOCITY:
                p_exprs += [  "a.v=numpy.zeros(3, dtype=float)",
                              "a.v[0]=fields[3]",
                              "a.v[1]=fields[4]",
                              "a.v[2]=fields[5]" ]
            for idx in sorted_aux_keys:
                prop = p_auxiliary[idx]
                col = idx + 6 - 3*xcfg_NO_VELOCITY
                if prop == "Uiso":
                    p_exprs.append("a.U[0,0]=a.U[1,1]=a.U[2,2]=" +
                        "fields[%d]" % col)
                elif re.match(r"^U\d\d$", prop) \
                and 1<=int(prop[1])<=3 and 1<=int(prop[2])<=3 :
                    i, j = int(prop[1])-1, int(prop[2])-1
                    if i==j:
                        p_exprs.append("a.U[%i,%i]=fields[%d]" % (i, j, col) )
                    else:
                        p_exprs.append("a.U[%i,%i]=a.U[%i,%i]=fields[%d]" % \
                                (i, j, j, i, col) )
                else:
                    p_exprs.append( "a.__dict__[%r]=fields[%d]" % \
                            (prop, col) )
            p_assign_expr = "pass; " + "; ".join(p_exprs[3:])
            exec "def p_assign_atom(a, fields) : %s" % p_assign_expr
            # here we are inside data
            p_element = None
            p_nl -= 1
            for line in lines[p_nl:stop]:
                p_nl += 1
                words = line.split()
                # ignore atom mass
                if len(words) == 1 and isfloat(words[0]):
                    continue
                # parse element allowing empty symbol
                elif len(words) <= 1:
                    w = line.strip()
                    p_element = w[:1].upper() + w[1:].lower()
                elif len(words) == xcfg_entry_count and p_element is not None:
                    fields = [ float(w) for w in words ]
                    stru.addNewAtom(p_element, fields[:3])
                    a = stru.getLastAtom()
                    a.xyz *= xcfg_A
                    p_assign_atom(a, fields)
                else:
                    emsg = "%d: invalid record" % p_nl
                    raise StructureFormatError(emsg)
            if len(stru) != p_natoms:
                emsg = "expected %d atoms, read %d" % (p_natoms, len(stru))
                raise StructureFormatError(emsg)
        except (ValueError, IndexError):
            emsg = "%d: file is not in XCFG format" % p_nl
            exc_type, exc_value, exc_traceback = sys.exc_info()
            raise StructureFormatError, emsg, exc_traceback
        return stru
Beispiel #14
0
    def parseLines(self, lines):
        """Parse list of lines in XYZ format.

        Return Structure object or raise StructureFormatError.
        """
        linefields = [l.split() for l in lines]
        # prepare output structure
        stru = Structure()
        # find first valid record
        start = 0
        for field in linefields:
            if len(field) == 0 or field[0] == "#":
                start += 1
            else:
                break
        # first valid line gives number of atoms
        try:
            lfs = linefields[start]
            w1 = linefields[start][0]
            if len(lfs) == 1 and str(int(w1)) == w1:
                p_natoms = int(w1)
                stru.title = lines[start+1].strip()
                start += 2
            else:
                emsg = ("%d: invalid XYZ format, missing number of atoms" %
                        (start + 1))
                raise StructureFormatError(emsg)
        except (IndexError, ValueError):
            exc_type, exc_value, exc_traceback = sys.exc_info()
            emsg = ("%d: invalid XYZ format, missing number of atoms" %
                    (start + 1))
            raise StructureFormatError, emsg, exc_traceback
        # find the last valid record
        stop = len(lines)
        while stop > start and len(linefields[stop-1]) == 0:
            stop -= 1
        # get out for empty structure
        if p_natoms == 0 or start >= stop:
            return stru
        # here we have at least one valid record line
        nfields = len(linefields[start])
        if nfields != 4:
            emsg = "%d: invalid XYZ format, expected 4 columns" % (start + 1)
            raise StructureFormatError(emsg)
        # now try to read all record lines
        try:
            p_nl = start
            for fields in linefields[start:] :
                p_nl += 1
                if fields == []:
                    continue
                elif len(fields) != nfields:
                    emsg = ('%d: all lines must have ' +
                            'the same number of columns') % p_nl
                    raise StructureFormatError(emsg)
                element = fields[0]
                element = element[0].upper() + element[1:].lower()
                xyz = [ float(f) for f in fields[1:4] ]
                stru.addNewAtom(element, xyz=xyz)
        except ValueError:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            emsg = "%d: invalid number format" % p_nl
            raise StructureFormatError, emsg, exc_traceback
        # finally check if all the atoms have been read
        if p_natoms is not None and len(stru) != p_natoms:
            emsg = "expected %d atoms, read %d" % (p_natoms, len(stru))
            raise StructureFormatError(emsg)
        return stru
Beispiel #15
0
            p = getParser(fmt)
            try:
                pmethod = getattr(p, method)
                stru = pmethod(*args, **kwargs)
                self.format = fmt
                break
            except StructureFormatError, err:
                parsers_emsgs.append("%s: %s" % (fmt, err))
            except NotImplementedError:
                pass
        if stru is None:
            emsg = "\n".join([
                "Unknown or invalid structure format.",
                "Errors per each tested structure format:"
            ] + parsers_emsgs)
            raise StructureFormatError(emsg)
        self.__dict__.update(p.__dict__)
        return stru

    # End of parseLines


# End of class P_auto

# Routines


def getParser():
    return P_auto()

Beispiel #16
0
    def parseLines(self, lines):
        """Parse list of lines in PDB format.

        Return Structure instance or raise StructureFormatError.
        """
        try:
            stru = Structure()
            scale = numpy.identity(3, dtype=float)
            scaleU = numpy.zeros(3, dtype=float)
            p_nl = 0
            for line in lines:
                p_nl += 1
                # skip blank lines
                if not line.strip(): continue
                # make sure line has 80 characters
                if len(line) < 80:
                    line = "%-80s" % line
                words = line.split()
                record = words[0]
                if record == "TITLE":
                    continuation = line[8:10]
                    if continuation.strip():
                        stru.title += line[10:].rstrip()
                    else:
                        stru.title = line[10:].rstrip()
                elif record == "CRYST1":
                    a = float(line[7:15])
                    b = float(line[15:24])
                    c = float(line[24:33])
                    alpha = float(line[33:40])
                    beta = float(line[40:47])
                    gamma = float(line[47:54])
                    stru.lattice.setLatPar(a, b, c, alpha, beta, gamma)
                    scale = numpy.transpose(stru.lattice.recbase)
                elif record == "SCALE1":
                    sc = numpy.zeros((3, 3), dtype=float)
                    sc[0, :] = [float(x) for x in line[10:40].split()]
                    scaleU[0] = float(line[45:55])
                elif record == "SCALE2":
                    sc[1, :] = [float(x) for x in line[10:40].split()]
                    scaleU[1] = float(line[45:55])
                elif record == "SCALE3":
                    sc[2, :] = [float(x) for x in line[10:40].split()]
                    scaleU[2] = float(line[45:55])
                    base = numpy.transpose(numpy.linalg.inv(sc))
                    abcABGcryst = numpy.array(stru.lattice.abcABG())
                    stru.lattice.setLatBase(base)
                    abcABGscale = numpy.array(stru.lattice.abcABG())
                    reldiff = numpy.fabs(1.0 - abcABGscale / abcABGcryst)
                    if not numpy.all(reldiff < 1.0e-4):
                        emsg = "%d: " % p_nl + \
                                "SCALE and CRYST1 are not consistent."
                        raise StructureFormatError(emsg)
                    if numpy.any(scaleU != 0.0):
                        emsg = "Origin offset not yet implemented."
                        raise NotImplementedError(emsg)
                elif record in ("ATOM", "HETATM"):
                    name = line[12:16].strip()
                    rc = [float(x) for x in line[30:54].split()]
                    try:
                        occupancy = float(line[54:60])
                    except ValueError:
                        occupancy = 1.0
                    try:
                        B = float(line[60:66])
                        uiso = B / (8 * pi**2)
                    except ValueError:
                        uiso = 0.0
                    element = line[76:78].strip()
                    if element == "":
                        # get element from the first 2 characters of name
                        element = line[12:14].strip()
                        element = element[0].upper() + element[1:].lower()
                    stru.addNewAtom(element, occupancy=occupancy, label=name)
                    last_atom = stru.getLastAtom()
                    last_atom.xyz_cartn = rc
                    last_atom.Uisoequiv = uiso
                elif record == "SIGATM":
                    sigrc = [float(x) for x in line[30:54].split()]
                    sigxyz = numpy.dot(scale, sigrc)
                    try:
                        sigo = float(line[54:60])
                    except ValueError:
                        sigo = 0.0
                    try:
                        sigB = float(line[60:66])
                        sigU = numpy.identity(3) * sigB / (8 * pi**2)
                    except ValueError:
                        sigU = numpy.zeros((3, 3), dtype=float)
                    last_atom.sigxyz = sigxyz
                    last_atom.sigo = sigo
                    last_atom.sigU = sigU
                elif record == "ANISOU":
                    last_atom.anisotropy = True
                    Uij = [float(x) * 1.0e-4 for x in line[28:70].split()]
                    Ua = last_atom.U
                    for i in range(3):
                        Ua[i, i] = Uij[i]
                    Ua[0, 1] = Ua[1, 0] = Uij[3]
                    Ua[0, 2] = Ua[2, 0] = Uij[4]
                    Ua[1, 2] = Ua[2, 1] = Uij[5]
                elif record == "SIGUIJ":
                    sigUij = [float(x) * 1.0e-4 for x in line[28:70].split()]
                    for i in range(3):
                        last_atom.sigU[i, i] = sigUij[i]
                    last_atom.sigU[0, 1] = last_atom.sigU[1, 0] = sigUij[3]
                    last_atom.sigU[0, 2] = last_atom.sigU[2, 0] = sigUij[4]
                    last_atom.sigU[1, 2] = last_atom.sigU[2, 1] = sigUij[5]
                elif record in P_pdb.validRecords:
                    pass
                else:
                    emsg = "%d: invalid record name '%r'" % (p_nl, record)
                    raise StructureFormatError(emsg)
        except (ValueError, IndexError):
            emsg = "%d: invalid PDB record" % p_nl
            exc_type, exc_value, exc_traceback = sys.exc_info()
            raise StructureFormatError, emsg, exc_traceback
        return stru