예제 #1
0
    def parseLines(self, lines):
        """Parse list of lines in RAWXYZ format.

        Return Structure object or raise StructureFormatError.
        """
        linefields = [l.split() for l in lines]
        # prepare output structure
        stru = Structure()
        # find first valid record
        start = 0
        for field in linefields:
            if len(field) == 0 or field[0] == "#":
                start += 1
            else:
                break
        # find the last valid record
        stop = len(lines)
        while stop > start and len(linefields[stop-1]) == 0:
            stop -= 1
        # get out for empty structure
        if start >= stop:
            return stru
        # here we have at least one valid record line
        # figure out xyz layout from the first line for plain and raw formats
        floatfields = [ isfloat(f) for f in linefields[start] ]
        nfields = len(linefields[start])
        if nfields not in (3, 4):
            emsg = ("%d: invalid RAWXYZ format, expected 3 or 4 columns" %
                    (start + 1))
            raise StructureFormatError(emsg)
        if floatfields[:3] == [True, True, True]:
            el_idx, x_idx = (None, 0)
        elif floatfields[:4] == [False, True, True, True]:
            el_idx, x_idx = (0, 1)
        else:
            emsg = "%d: invalid RAWXYZ format" % (start + 1)
            raise StructureFormatError(emsg)
        # now try to read all record lines
        try:
            p_nl = start
            for fields in linefields[start:] :
                p_nl += 1
                if fields == []:
                    continue
                elif len(fields) != nfields:
                    emsg = ('%d: all lines must have ' +
                            'the same number of columns') % p_nl
                    raise StructureFormatError(emsg)
                element = el_idx is not None and fields[el_idx] or ""
                xyz = [ float(f) for f in fields[x_idx:x_idx+3] ]
                if len(xyz) == 2:
                    xyz.append(0.0)
                stru.addNewAtom(element, xyz=xyz)
        except ValueError:
            emsg = "%d: invalid number" % p_nl
            exc_type, exc_value, exc_traceback = sys.exc_info()
            e = StructureFormatError(emsg)
            six.reraise(StructureFormatError, e, exc_traceback)
        return stru
예제 #2
0
    def parseLines(self, lines):
        """Parse list of lines in RAWXYZ format.

        Return Structure object or raise StructureFormatError.
        """
        linefields = [l.split() for l in lines]
        # prepare output structure
        stru = Structure()
        # find first valid record
        start = 0
        for field in linefields:
            if len(field) == 0 or field[0] == "#":
                start += 1
            else:
                break
        # find the last valid record
        stop = len(lines)
        while stop > start and len(linefields[stop - 1]) == 0:
            stop -= 1
        # get out for empty structure
        if start >= stop:
            return stru
        # here we have at least one valid record line
        # figure out xyz layout from the first line for plain and raw formats
        floatfields = [isfloat(f) for f in linefields[start]]
        nfields = len(linefields[start])
        if nfields not in (3, 4):
            emsg = ("%d: invalid RAWXYZ format, expected 3 or 4 columns" %
                    (start + 1))
            raise StructureFormatError(emsg)
        if floatfields[:3] == [True, True, True]:
            el_idx, x_idx = (None, 0)
        elif floatfields[:4] == [False, True, True, True]:
            el_idx, x_idx = (0, 1)
        else:
            emsg = "%d: invalid RAWXYZ format" % (start + 1)
            raise StructureFormatError(emsg)
        # now try to read all record lines
        try:
            p_nl = start
            for fields in linefields[start:]:
                p_nl += 1
                if fields == []:
                    continue
                elif len(fields) != nfields:
                    emsg = ('%d: all lines must have ' +
                            'the same number of columns') % p_nl
                    raise StructureFormatError(emsg)
                element = el_idx is not None and fields[el_idx] or ""
                xyz = [float(f) for f in fields[x_idx:x_idx + 3]]
                if len(xyz) == 2:
                    xyz.append(0.0)
                stru.addNewAtom(element, xyz=xyz)
        except ValueError:
            emsg = "%d: invalid number" % p_nl
            exc_type, exc_value, exc_traceback = sys.exc_info()
            e = StructureFormatError(emsg)
            six.reraise(StructureFormatError, e, exc_traceback)
        return stru
예제 #3
0
class P_cif(StructureParser):
    """Simple parser for CIF structure format.
    Reads Structure from the first block containing _atom_site_label key.
    Following blocks, if any are ignored.

    Data members:

    format      -- structure format name
    ciffile     -- instance of CifFile from PyCifRW
    stru        -- Structure instance used for cif input or output

    Data members used for input only:

    spacegroup  -- instance of SpaceGroup used for symmetry expansion
    eps         -- resolution in fractional coordinates for non-equal
                   positions.  Use for expansion of asymmetric unit.
    eau         -- instance of ExpandAsymmetricUnit from SymmetryUtilities
    asymmetric_unit -- list of atom instances for the original asymmetric
                   unit in the CIF file
    labelindex  -- dictionary mapping unique atom label to index of atom
                   in self.asymmetric_unit
    cif_sgname  -- space group name obtained by looking up the value of
                   _space_group_name_Hall, _symmetry_space_group_name_Hall,
                   _space_group_name_H-M_alt, _symmetry_space_group_name_H-M
                   items.  None when neither is defined.
    """

    # static data and methods ------------------------------------------------

    # dictionary set of class methods for translating CIF values
    # to Atom attributes

    _atom_setters = dict.fromkeys((
        '_tr_ignore',
        '_tr_atom_site_label',
        '_tr_atom_site_type_symbol',
        '_tr_atom_site_fract_x',
        '_tr_atom_site_fract_y',
        '_tr_atom_site_fract_z',
        '_tr_atom_site_cartn_x',
        '_tr_atom_site_cartn_y',
        '_tr_atom_site_cartn_z',
        '_tr_atom_site_U_iso_or_equiv',
        '_tr_atom_site_B_iso_or_equiv',
        '_tr_atom_site_adp_type', '_tr_atom_site_thermal_displace_type',
        '_tr_atom_site_occupancy',
        '_tr_atom_site_aniso_U_11',
        '_tr_atom_site_aniso_U_22',
        '_tr_atom_site_aniso_U_33',
        '_tr_atom_site_aniso_U_12',
        '_tr_atom_site_aniso_U_13',
        '_tr_atom_site_aniso_U_23',
        '_tr_atom_site_aniso_B_11',
        '_tr_atom_site_aniso_B_22',
        '_tr_atom_site_aniso_B_33',
        '_tr_atom_site_aniso_B_12',
        '_tr_atom_site_aniso_B_13',
        '_tr_atom_site_aniso_B_23',
        ))
    # make _atom_setters case insensitive
    for k in list(_atom_setters.keys()):
        _atom_setters[k] = _atom_setters[k.lower()] = k
    del k

    BtoU = 1.0/(8 * numpy.pi**2)

    def _tr_ignore(a, value):
        return
    _tr_ignore = staticmethod(_tr_ignore)

    def _tr_atom_site_label(a, value):
        a.label = str(value)
        # set element when not specified by _atom_site_type_symbol
        if not a.element:
            P_cif._tr_atom_site_type_symbol(a, value)
    _tr_atom_site_label = staticmethod(_tr_atom_site_label)

    # 3 regexp groups for nucleon number, atom symbol, and oxidation state
    _psymb = re.compile(r'(\d+-)?([a-zA-Z]+)(\d[+-])?')

    def _tr_atom_site_type_symbol(a, value):
        rx = P_cif._psymb.match(value)
        smbl = rx and rx.group(0) or value
        smbl = str(smbl)
        a.element = smbl[:1].upper() + smbl[1:].lower()
    _tr_atom_site_type_symbol = staticmethod(_tr_atom_site_type_symbol)

    def _tr_atom_site_fract_x(a, value):
        a.xyz[0] = leading_float(value)
    _tr_atom_site_fract_x = staticmethod(_tr_atom_site_fract_x)

    def _tr_atom_site_fract_y(a, value):
        a.xyz[1] = leading_float(value)
    _tr_atom_site_fract_y = staticmethod(_tr_atom_site_fract_y)

    def _tr_atom_site_fract_z(a, value):
        a.xyz[2] = leading_float(value)
    _tr_atom_site_fract_z = staticmethod(_tr_atom_site_fract_z)

    def _tr_atom_site_cartn_x(a, value):
        a.xyz_cartn[0] = leading_float(value)
    _tr_atom_site_cartn_x = staticmethod(_tr_atom_site_cartn_x)

    def _tr_atom_site_cartn_y(a, value):
        a.xyz_cartn[1] = leading_float(value)
    _tr_atom_site_cartn_y = staticmethod(_tr_atom_site_cartn_y)

    def _tr_atom_site_cartn_z(a, value):
        a.xyz_cartn[2] = leading_float(value)
    _tr_atom_site_cartn_z = staticmethod(_tr_atom_site_cartn_z)

    def _tr_atom_site_U_iso_or_equiv(a, value):
        a.Uisoequiv = leading_float(value)
    _tr_atom_site_U_iso_or_equiv = staticmethod(_tr_atom_site_U_iso_or_equiv)

    def _tr_atom_site_B_iso_or_equiv(a, value):
        a.Uisoequiv = P_cif.BtoU * leading_float(value)
    _tr_atom_site_B_iso_or_equiv = staticmethod(_tr_atom_site_B_iso_or_equiv)

    def _tr_atom_site_adp_type(a, value):
        a.anisotropy = value not in ("Uiso", "Biso")
    _tr_atom_site_adp_type = staticmethod(_tr_atom_site_adp_type)
    _tr_atom_site_thermal_displace_type = _tr_atom_site_adp_type

    def _tr_atom_site_occupancy(a, value):
        a.occupancy = leading_float(value, 1.0)
    _tr_atom_site_occupancy = staticmethod(_tr_atom_site_occupancy)

    def _tr_atom_site_aniso_U_11(a, value):
        a.U11 = leading_float(value)
    _tr_atom_site_aniso_U_11 = staticmethod(_tr_atom_site_aniso_U_11)

    def _tr_atom_site_aniso_U_22(a, value):
        a.U22 = leading_float(value)
    _tr_atom_site_aniso_U_22 = staticmethod(_tr_atom_site_aniso_U_22)

    def _tr_atom_site_aniso_U_33(a, value):
        a.U33 = leading_float(value)
    _tr_atom_site_aniso_U_33 = staticmethod(_tr_atom_site_aniso_U_33)

    def _tr_atom_site_aniso_U_12(a, value):
        a.U12 = leading_float(value)
    _tr_atom_site_aniso_U_12 = staticmethod(_tr_atom_site_aniso_U_12)

    def _tr_atom_site_aniso_U_13(a, value):
        a.U13 = leading_float(value)
    _tr_atom_site_aniso_U_13 = staticmethod(_tr_atom_site_aniso_U_13)

    def _tr_atom_site_aniso_U_23(a, value):
        a.U23 = leading_float(value)
    _tr_atom_site_aniso_U_23 = staticmethod(_tr_atom_site_aniso_U_23)

    def _tr_atom_site_aniso_B_11(a, value):
        a.U11 = P_cif.BtoU * leading_float(value)
    _tr_atom_site_aniso_B_11 = staticmethod(_tr_atom_site_aniso_B_11)

    def _tr_atom_site_aniso_B_22(a, value):
        a.U22 = P_cif.BtoU * leading_float(value)
    _tr_atom_site_aniso_B_22 = staticmethod(_tr_atom_site_aniso_B_22)

    def _tr_atom_site_aniso_B_33(a, value):
        a.U33 = P_cif.BtoU * leading_float(value)
    _tr_atom_site_aniso_B_33 = staticmethod(_tr_atom_site_aniso_B_33)

    def _tr_atom_site_aniso_B_12(a, value):
        a.U12 = P_cif.BtoU * leading_float(value)
    _tr_atom_site_aniso_B_12 = staticmethod(_tr_atom_site_aniso_B_12)

    def _tr_atom_site_aniso_B_13(a, value):
        a.U13 = P_cif.BtoU * leading_float(value)
    _tr_atom_site_aniso_B_13 = staticmethod(_tr_atom_site_aniso_B_13)

    def _tr_atom_site_aniso_B_23(a, value):
        a.U23 = P_cif.BtoU * leading_float(value)
    _tr_atom_site_aniso_B_23 = staticmethod(_tr_atom_site_aniso_B_23)


    def _get_atom_setters(cifloop):
        """Find translators of CifLoop items to data in Atom instance.
        Static method.

        cifloop -- instance of CifLoop

        Return a list of setter functions in the order of cifloop.keys().
        """
        rv = []
        for p in cifloop.keys():
            lcname = "_tr" + p.lower()
            fncname = P_cif._atom_setters.get(lcname, '_tr_ignore')
            f = getattr(P_cif, fncname)
            rv.append(f)
        return rv
    _get_atom_setters = staticmethod(_get_atom_setters)

    # normal methods ---------------------------------------------------------

    def __init__(self, eps=None):
        """Initialize the parser for CIF structure files.

        eps  -- fractional coordinates cutoff for duplicate positions.
                When None use the default for ExpandAsymmetricUnit.
        """
        StructureParser.__init__(self)
        self.format = "cif"
        self.ciffile = None
        self.stru = None
        self.spacegroup = None
        self.eps = eps
        self.eau = None
        self.asymmetric_unit = None
        self.labelindex = {}
        self.cif_sgname = None
        pass


    def parse(self, s):
        """Create Structure instance from a string in CIF format.

        Return Structure instance or raise StructureFormatError.
        """
        self.ciffile = None
        self.filename = ''
        fp = six.StringIO(s)
        rv = self._parseCifDataSource(fp)
        return rv


    def parseLines(self, lines):
        """Parse list of lines in CIF format.

        lines -- list of strings stripped of line terminator

        Return Structure instance or raise StructureFormatError.
        """
        s = "\n".join(lines) + '\n'
        return self.parse(s)


    def parseFile(self, filename):
        """Create Structure from an existing CIF file.

        filename  -- path to structure file

        Return Structure object.
        Raise StructureFormatError or IOError.
        """
        self.ciffile = None
        self.filename = filename
        fileurl = _fixIfWindowsPath(filename)
        rv = self._parseCifDataSource(fileurl)
        # all good here
        return rv


    def _parseCifDataSource(self, datasource):
        """\
        Open and process CIF data from the specified `datasource`.


        Parameters
        ----------
        datasource : str or a file-like object
            This is used as an argument to the CifFile class.  The CifFile
            instance is stored in `ciffile` attribute of this Parser.

        Returns
        -------
        Structure
            The Structure object loaded from the specified data source.

        Raises
        ------
        StructureFormatError
            When the data do not constitute a valid CIF format.
        """
        from CifFile import CifFile, StarError
        self.stru = None
        try:
            with _suppressCifParserOutput():
                self.ciffile = CifFile(datasource)
                for blockname in self.ciffile.keys():
                    self._parseCifBlock(blockname)
                    # stop after reading the first structure
                    if self.stru is not None:
                        break
        except (StarError, ValueError, IndexError) as err:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            emsg = str(err).strip()
            e = StructureFormatError(emsg)
            six.reraise(StructureFormatError, e, exc_traceback)
        return self.stru


    def _parseCifBlock(self, blockname):
        """Translate CIF file block, skip blocks without _atom_site_label.
        Updates data members stru, eau.

        blockname  -- name of top level block in self.ciffile

        No return value.
        """
        block = self.ciffile[blockname]
        if '_atom_site_label' not in block:   return
        # here block contains structure, initialize output data
        self.stru = Structure()
        self.labelindex.clear()
        # execute specialized block parsers
        self._parse_lattice(block)
        self._parse_atom_site_label(block)
        self._parse_atom_site_aniso_label(block)
        self._parse_space_group_symop_operation_xyz(block)
        return


    def _parse_lattice(self, block):
        """Obtain lattice parameters from a CifBlock.
        This method updates self.stru.lattice.

        block -- instance of CifBlock

        No return value.
        """
        if '_cell_length_a' not in block: return
        # obtain lattice parameters
        try:
            latpars = (
                leading_float(block['_cell_length_a']),
                leading_float(block['_cell_length_b']),
                leading_float(block['_cell_length_c']),
                leading_float(block['_cell_angle_alpha']),
                leading_float(block['_cell_angle_beta']),
                leading_float(block['_cell_angle_gamma']),
            )
        except KeyError as err:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            emsg = str(err)
            e = StructureFormatError(emsg)
            six.reraise(StructureFormatError, e, exc_traceback)
        self.stru.lattice = Lattice(*latpars)
        return


    def _parse_atom_site_label(self, block):
        """Obtain atoms in asymmetric unit from a CifBlock.
        This method inserts Atom instances to self.stru and
        updates labelindex dictionary.

        block -- instance of CifBlock

        No return value.
        """
        # process _atom_site_label
        atom_site_loop = block.GetLoop('_atom_site_label')
        # get a list of setters for atom_site values
        prop_setters = P_cif._get_atom_setters(atom_site_loop)
        # index of the _atom_site_label item for the labelindex dictionary
        ilb = atom_site_loop.keys().index('_atom_site_label')
        # loop through the values and pass them to the setters
        sitedatalist = zip(*atom_site_loop.values())
        for values in sitedatalist:
            curlabel = values[ilb]
            # skip entries that have invalid label
            if curlabel == '?':
                continue
            self.labelindex[curlabel] = len(self.stru)
            self.stru.addNewAtom()
            a = self.stru.getLastAtom()
            for fset, val in zip(prop_setters, values):
                fset(a, val)
        return


    def _parse_atom_site_aniso_label(self, block):
        """Obtain value of anisotropic thermal displacements from a CifBlock.
        This method updates U members of Atom instances in self.stru.
        The labelindex dictionary has to be defined beforehand.

        block -- instance of CifBlock

        No return value.
        """
        if '_atom_site_aniso_label' not in block: return
        # was anisotropy processed in the _atom_site_label loop?
        isotropy_done = _hasAtomSiteADPType(block)
        # something to do here:
        adp_loop = block.GetLoop('_atom_site_aniso_label')
        # index of the _atom_site_label column
        ilb = adp_loop.keys().index('_atom_site_aniso_label')
        # get a list of setters for this loop
        prop_setters = P_cif._get_atom_setters(adp_loop)
        sitedatalist = zip(*adp_loop.values())
        for values in sitedatalist:
            idx = self.labelindex[values[ilb]]
            a = self.stru[idx]
            if not isotropy_done:
                a.anisotropy = True
            for fset, val in zip(prop_setters, values):
                fset(a, val)
        return


    def _parse_space_group_symop_operation_xyz(self, block):
        """Process symmetry operations from a CifBlock.  The method
        updates spacegroup and eau data according to symmetry
        operations defined in _space_group_symop_operation_xyz or
        _symmetry_equiv_pos_as_xyz items in CifBlock.

        block -- instance of CifBlock

        No return value.
        """
        from diffpy.structure.spacegroups import IsSpaceGroupIdentifier
        from diffpy.structure.spacegroups import SpaceGroup, GetSpaceGroup
        from diffpy.structure.spacegroups import FindSpaceGroup
        self.asymmetric_unit = list(self.stru)
        sym_synonyms = ('_space_group_symop_operation_xyz',
                        '_symmetry_equiv_pos_as_xyz')
        sym_loop_name = [n for n in sym_synonyms if n in block]
        # recover explicit list of symmetry operations
        symop_list = []
        if sym_loop_name:
            # sym_loop exists here and we know its cif name
            sym_loop_name = sym_loop_name[0]
            sym_loop = block.GetLoop(sym_loop_name)
            for eqxyz in sym_loop[sym_loop_name]:
                opcif = getSymOp(eqxyz)
                symop_list.append(opcif)
        # determine space group number
        sg_nameHall = (block.get('_space_group_name_Hall', '') or
                block.get('_symmetry_space_group_name_Hall', ''))
        sg_nameHM = (block.get('_space_group_name_H-M_alt', '') or
                block.get('_symmetry_space_group_name_H-M', ''))
        self.cif_sgname = (sg_nameHall or sg_nameHM or None)
        sgid = (int(block.get('_space_group_IT_number', '0')) or
                int(block.get('_symmetry_Int_Tables_number', '0')) or
                sg_nameHM)
        self.spacegroup = None
        # try to reuse existing space group from symmetry operations
        if symop_list:
            try:
                self.spacegroup = FindSpaceGroup(symop_list)
            except ValueError:
                pass
        # otherwise lookup the space group from its identifier
        if self.spacegroup is None and sgid and IsSpaceGroupIdentifier(sgid):
            self.spacegroup = GetSpaceGroup(sgid)
        # define new spacegroup when symmetry operations were listed, but
        # there is no match to an existing definition
        if symop_list and self.spacegroup is None:
            new_short_name = "CIF " + (sg_nameHall or 'data')
            new_crystal_system = (
                    block.get('_space_group_crystal_system') or
                    block.get('_symmetry_cell_setting') or
                    'TRICLINIC' ).upper()
            self.spacegroup = SpaceGroup(
                    short_name=new_short_name,
                    crystal_system=new_crystal_system,
                    symop_list=symop_list)
        if self.spacegroup is None:
            emsg = "CIF file has unknown space group identifier {!r}."
            raise StructureFormatError(emsg.format(sgid))
        self._expandAsymmetricUnit(block)
        return


    def _expandAsymmetricUnit(self, block):
        """Perform symmetry expansion of self.stru using self.spacegroup.

        This method updates data in stru and eau.

        Parameters
        ----------
        block : CifBlock
            The top-level block containing crystal structure data.
        """
        from diffpy.structure.symmetryutilities import ExpandAsymmetricUnit
        corepos = [a.xyz for a in self.stru]
        coreUijs = [a.U for a in self.stru]
        self.eau = ExpandAsymmetricUnit(self.spacegroup, corepos, coreUijs,
                                        eps=self.eps)
        # setup anisotropy according to symmetry requirements
        # was isotropy flag already processed
        isotropy_done = (_hasAtomSiteADPType(block) or
                         '_atom_site_aniso_label' in block)
        if not isotropy_done:
            for ca, uisotropy in zip(self.stru, self.eau.Uisotropy):
                ca.anisotropy = not uisotropy
        # build a nested list of new atoms:
        newatoms = []
        for i, ca in enumerate(self.stru):
            eca = []    # expanded core atom
            for j in range(self.eau.multiplicity[i]):
                a = Atom(ca)
                a.xyz = self.eau.expandedpos[i][j]
                if j > 0:
                    a.label += '_' + str(j + 1)
                if a.anisotropy:
                    a.U = self.eau.expandedUijs[i][j]
                eca.append(a)
            newatoms.append(eca)
        # insert new atoms where they belong
        self.stru[:] = sum(newatoms, [])
        return

    # conversion to CIF ------------------------------------------------------

    def toLines(self, stru):
        """Convert Structure stru to a list of lines in basic CIF format.

        Return list of strings.
        """
        import time
        lines = []
        # may be replaced with filtered Structure.title
        # for now, we can add the title as a comment
        if stru.title.strip() != "":
            title_lines = stru.title.split('\n')
            lines.extend([ "# " + line.strip() for line in title_lines ])
            lines.append("")
        lines.append("data_3D")
        iso_date =  "%04i-%02i-%02i" % time.gmtime()[:3]
        lines.extend([
            "%-31s %s" % ("_audit_creation_date", iso_date),
            "%-31s %s" % ("_audit_creation_method", "P_cif.py"),
            "",
            "%-31s %s" % ("_symmetry_space_group_name_H-M", "'P1'"),
            "%-31s %s" % ("_symmetry_Int_Tables_number", "1"),
            "%-31s %s" % ("_symmetry_cell_setting", "triclinic"),
            "" ])
        # there should be no need to specify equivalent positions for P1
        # _symmetry_equiv_posi_as_xyz x,y,z
        lines.extend([
            "%-31s %.6g" % ("_cell_length_a", stru.lattice.a),
            "%-31s %.6g" % ("_cell_length_b", stru.lattice.b),
            "%-31s %.6g" % ("_cell_length_c", stru.lattice.c),
            "%-31s %.6g" % ("_cell_angle_alpha", stru.lattice.alpha),
            "%-31s %.6g" % ("_cell_angle_beta", stru.lattice.beta),
            "%-31s %.6g" % ("_cell_angle_gamma", stru.lattice.gamma),
            "" ])
        # build a list of site labels and adp (displacement factor) types
        element_count = {}
        a_site_label = []
        a_adp_type = []
        for a in stru:
            cnt = element_count[a.element] = element_count.get(a.element,0)+1
            a_site_label.append( "%s%i" % (a.element, cnt) )
            if numpy.all(a.U == a.U[0,0]*numpy.identity(3)):
                a_adp_type.append("Uiso")
            else:
                a_adp_type.append("Uani")
        # list all atoms
        lines.extend([
            "loop_",
            "  _atom_site_label",
            "  _atom_site_type_symbol",
            "  _atom_site_fract_x",
            "  _atom_site_fract_y",
            "  _atom_site_fract_z",
            "  _atom_site_U_iso_or_equiv",
            "  _atom_site_adp_type",
            "  _atom_site_occupancy" ])
        for i in range(len(stru)):
            a = stru[i]
            line = "  %-5s %-3s %11.6f %11.6f %11.6f %11.6f %-5s %.4f" % (
                    a_site_label[i], a.element, a.xyz[0], a.xyz[1], a.xyz[2],
                    a.Uisoequiv, a_adp_type[i], a.occupancy  )
            lines.append(line)
        # find anisotropic atoms
        idx_aniso = [ i for i in range(len(stru)) if a_adp_type[i] != "Uiso" ]
        if idx_aniso != []:
            lines.extend([
                "loop_",
                "  _atom_site_aniso_label",
                "  _atom_site_aniso_U_11",
                "  _atom_site_aniso_U_22",
                "  _atom_site_aniso_U_33",
                "  _atom_site_aniso_U_12",
                "  _atom_site_aniso_U_13",
                "  _atom_site_aniso_U_23" ])
            for i in idx_aniso:
                a = stru[i]
                line = "  %-5s %9.6f %9.6f %9.6f %9.6f %9.6f %9.6f" % (
                        a_site_label[i], a.U[0,0], a.U[1,1], a.U[2,2],
                        a.U[0,1], a.U[0,2], a.U[1,2] )
                lines.append(line)
        return lines
예제 #4
0
    def parseLines(self, lines):
        """Parse list of lines in PDB format.

        Return Structure object or raise StructureFormatError.
        """
        xcfg_Number_of_particles = None
        xcfg_A = None
        xcfg_H0 = numpy.zeros((3,3), dtype=float)
        xcfg_H0_set = numpy.zeros((3,3), dtype=bool)
        xcfg_NO_VELOCITY = False
        xcfg_entry_count = None
        p_nl = 0
        p_auxiliary_re = re.compile(r"^auxiliary\[(\d+)\] =")
        p_auxiliary = {}
        stru = Structure()
        # ignore trailing blank lines
        stop = len(lines)
        for line in reversed(lines):
            if line.strip():
                break
            stop -= 1
        # iterator over the valid data lines
        ilines = iter(lines[:stop])
        try:
            # read XCFG header
            for line in ilines:
                p_nl += 1
                stripped_line = line.strip()
                # blank lines and lines starting with # are ignored
                if stripped_line == "" or line[0] == '#':
                    continue
                elif xcfg_Number_of_particles is None:
                    if line.find("Number of particles =") != 0:
                        emsg = ("%d: first line must " +
                                "contain 'Number of particles ='") % p_nl
                        raise StructureFormatError(emsg)
                    xcfg_Number_of_particles = int(line[21:].split(None, 1)[0])
                    p_natoms = xcfg_Number_of_particles
                elif line.find("A =") == 0:
                    xcfg_A = float(line[3:].split(None, 1)[0])
                elif line.find("H0(") == 0:
                    i, j = (int(line[3]) - 1, int(line[5]) - 1)
                    xcfg_H0[i,j] = float(line[10:].split(None, 1)[0])
                    xcfg_H0_set[i,j] = True
                elif line.find(".NO_VELOCITY.") == 0:
                    xcfg_NO_VELOCITY = True
                elif line.find("entry_count =") == 0:
                    xcfg_entry_count = int(line[13:].split(None, 1)[0])
                elif p_auxiliary_re.match(line):
                    m = p_auxiliary_re.match(line)
                    idx = int(m.group(1))
                    p_auxiliary[idx] = line[m.end():].split(None, 1)[0]
                else:
                    break
            # check header for consistency
            if numpy.any(xcfg_H0_set == False):
                emsg = "H0 tensor is not properly defined"
                raise StructureFormatError(emsg)
            p_auxnum = len(p_auxiliary) and max(p_auxiliary.keys())+1
            for i in range(p_auxnum):
                if not i in p_auxiliary:
                    p_auxiliary[i] = "aux%d" % i
            sorted_aux_keys = sorted(p_auxiliary.keys())
            if p_auxnum != 0:
                stru.xcfg = {
                    'auxiliaries' : [ p_auxiliary[k]
                                      for k in sorted_aux_keys ]
                }
            ecnt = len(p_auxiliary) + (3 if xcfg_NO_VELOCITY else 6)
            if ecnt != xcfg_entry_count:
                emsg = ("%d: auxiliary fields are "
                        "not consistent with entry_count") % p_nl
                raise StructureFormatError(emsg)
            # define proper lattice
            stru.lattice.setLatBase(xcfg_H0)
            # here we are inside the data block
            p_element = None
            for line in ilines:
                p_nl += 1
                words = line.split()
                # ignore atom mass
                if len(words) == 1 and isfloat(words[0]):
                    continue
                # parse element allowing empty symbol
                elif len(words) <= 1:
                    w = line.strip()
                    p_element = w[:1].upper() + w[1:].lower()
                elif len(words) == xcfg_entry_count and p_element is not None:
                    fields = [float(w) for w in words]
                    xyz = [xcfg_A * xi for xi in fields[:3]]
                    stru.addNewAtom(p_element, xyz=xyz)
                    a = stru[-1]
                    _assign_auxiliaries(a, fields, auxiliaries=p_auxiliary,
                                        no_velocity=xcfg_NO_VELOCITY)
                else:
                    emsg = "%d: invalid record" % p_nl
                    raise StructureFormatError(emsg)
            if len(stru) != p_natoms:
                emsg = "expected %d atoms, read %d" % (p_natoms, len(stru))
                raise StructureFormatError(emsg)
        except (ValueError, IndexError):
            emsg = "%d: file is not in XCFG format" % p_nl
            exc_type, exc_value, exc_traceback = sys.exc_info()
            e = StructureFormatError(emsg)
            six.reraise(StructureFormatError, e, exc_traceback)
        return stru
예제 #5
0
    def parseLines(self, lines):
        """Parse list of lines in PDB format.

        Return Structure instance or raise StructureFormatError.
        """
        try:
            stru = Structure()
            scale = numpy.identity(3, dtype=float)
            scaleU = numpy.zeros(3, dtype=float)
            p_nl = 0
            for line in lines:
                p_nl += 1
                # skip blank lines
                if not line.strip():    continue
                # make sure line has 80 characters
                if len(line) < 80:
                    line = "%-80s" % line
                words = line.split()
                record = words[0]
                if record == "TITLE":
                    continuation = line[8:10]
                    if continuation.strip():
                        stru.title += line[10:].rstrip()
                    else:
                        stru.title = line[10:].rstrip()
                elif record == "CRYST1":
                    a = float(line[7:15])
                    b = float(line[15:24])
                    c = float(line[24:33])
                    alpha = float(line[33:40])
                    beta = float(line[40:47])
                    gamma = float(line[47:54])
                    stru.lattice.setLatPar(a, b, c, alpha, beta, gamma)
                    scale = numpy.transpose(stru.lattice.recbase)
                elif record == "SCALE1":
                    sc = numpy.zeros((3,3), dtype=float)
                    sc[0,:] = [float(x) for x in line[10:40].split()]
                    scaleU[0] = float(line[45:55])
                elif record == "SCALE2":
                    sc[1,:] = [float(x) for x in line[10:40].split()]
                    scaleU[1] = float(line[45:55])
                elif record == "SCALE3":
                    sc[2,:] = [float(x) for x in line[10:40].split()]
                    scaleU[2] = float(line[45:55])
                    base = numpy.transpose(numpy.linalg.inv(sc))
                    abcABGcryst = numpy.array(stru.lattice.abcABG())
                    stru.lattice.setLatBase(base)
                    abcABGscale = numpy.array(stru.lattice.abcABG())
                    reldiff = numpy.fabs(1.0 - abcABGscale/abcABGcryst)
                    if not numpy.all(reldiff < 1.0e-4):
                        emsg = "%d: " % p_nl + \
                                "SCALE and CRYST1 are not consistent."
                        raise StructureFormatError(emsg)
                    if numpy.any(scaleU != 0.0):
                        emsg = "Origin offset not yet implemented."
                        raise NotImplementedError(emsg)
                elif record in ("ATOM", "HETATM"):
                    name = line[12:16].strip()
                    rc = [float(x) for x in line[30:54].split()]
                    try:
                        occupancy = float(line[54:60])
                    except ValueError:
                        occupancy = 1.0
                    try:
                        B = float(line[60:66])
                        uiso = B/(8*pi**2)
                    except ValueError:
                        uiso = 0.0
                    element = line[76:78].strip()
                    if element == "":
                        # get element from the first 2 characters of name
                        element = line[12:14].strip()
                        element = element[0].upper() + element[1:].lower()
                    stru.addNewAtom(element,
                            occupancy=occupancy, label=name)
                    last_atom = stru.getLastAtom()
                    last_atom.xyz_cartn = rc
                    last_atom.Uisoequiv = uiso
                elif record == "SIGATM":
                    sigrc = [float(x) for x in line[30:54].split()]
                    sigxyz = numpy.dot(scale, sigrc)
                    try:
                        sigo = float(line[54:60])
                    except ValueError:
                        sigo = 0.0
                    try:
                        sigB = float(line[60:66])
                        sigU = numpy.identity(3)*sigB/(8*pi**2)
                    except ValueError:
                        sigU = numpy.zeros((3,3), dtype=float)
                    last_atom.sigxyz = sigxyz
                    last_atom.sigo = sigo
                    last_atom.sigU = sigU
                elif record == "ANISOU":
                    last_atom.anisotropy = True
                    Uij = [ float(x)*1.0e-4 for x in line[28:70].split() ]
                    Ua = last_atom.U
                    for i in range(3):
                        Ua[i,i] = Uij[i]
                    Ua[0,1] = Ua[1,0] = Uij[3]
                    Ua[0,2] = Ua[2,0] = Uij[4]
                    Ua[1,2] = Ua[2,1] = Uij[5]
                elif record == "SIGUIJ":
                    sigUij = [ float(x)*1.0e-4 for x in line[28:70].split() ]
                    for i in range(3):
                        last_atom.sigU[i,i] = sigUij[i]
                    last_atom.sigU[0,1] = last_atom.sigU[1,0] = sigUij[3]
                    last_atom.sigU[0,2] = last_atom.sigU[2,0] = sigUij[4]
                    last_atom.sigU[1,2] = last_atom.sigU[2,1] = sigUij[5]
                elif record in P_pdb.validRecords:
                    pass
                else:
                    emsg = "%d: invalid record name '%r'" % (p_nl, record)
                    raise StructureFormatError(emsg)
        except (ValueError, IndexError):
            emsg = "%d: invalid PDB record" % p_nl
            exc_type, exc_value, exc_traceback = sys.exc_info()
            e = StructureFormatError(emsg)
            six.reraise(StructureFormatError, e, exc_traceback)
        return stru
예제 #6
0
    def parseLines(self, lines):
        """Parse list of lines in XYZ format.

        Return Structure object or raise StructureFormatError.
        """
        linefields = [l.split() for l in lines]
        # prepare output structure
        stru = Structure()
        # find first valid record
        start = 0
        for field in linefields:
            if len(field) == 0 or field[0] == "#":
                start += 1
            else:
                break
        # first valid line gives number of atoms
        try:
            lfs = linefields[start]
            w1 = linefields[start][0]
            if len(lfs) == 1 and str(int(w1)) == w1:
                p_natoms = int(w1)
                stru.title = lines[start + 1].strip()
                start += 2
            else:
                emsg = ("%d: invalid XYZ format, missing number of atoms" %
                        (start + 1))
                raise StructureFormatError(emsg)
        except (IndexError, ValueError):
            exc_type, exc_value, exc_traceback = sys.exc_info()
            emsg = ("%d: invalid XYZ format, missing number of atoms" %
                    (start + 1))
            e = StructureFormatError(emsg)
            six.reraise(StructureFormatError, e, exc_traceback)
        # find the last valid record
        stop = len(lines)
        while stop > start and len(linefields[stop - 1]) == 0:
            stop -= 1
        # get out for empty structure
        if p_natoms == 0 or start >= stop:
            return stru
        # here we have at least one valid record line
        nfields = len(linefields[start])
        if nfields != 4:
            emsg = "%d: invalid XYZ format, expected 4 columns" % (start + 1)
            raise StructureFormatError(emsg)
        # now try to read all record lines
        try:
            p_nl = start
            for fields in linefields[start:]:
                p_nl += 1
                if fields == []:
                    continue
                elif len(fields) != nfields:
                    emsg = ('%d: all lines must have ' +
                            'the same number of columns') % p_nl
                    raise StructureFormatError(emsg)
                element = fields[0]
                element = element[0].upper() + element[1:].lower()
                xyz = [float(f) for f in fields[1:4]]
                stru.addNewAtom(element, xyz=xyz)
        except ValueError:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            emsg = "%d: invalid number format" % p_nl
            e = StructureFormatError(emsg)
            six.reraise(StructureFormatError, e, exc_traceback)
        # finally check if all the atoms have been read
        if p_natoms is not None and len(stru) != p_natoms:
            emsg = "expected %d atoms, read %d" % (p_natoms, len(stru))
            raise StructureFormatError(emsg)
        return stru
예제 #7
0
class P_cif(StructureParser):
    """Simple parser for CIF structure format.
    Reads Structure from the first block containing _atom_site_label key.
    Following blocks, if any are ignored.

    Data members:

    format      -- structure format name
    ciffile     -- instance of CifFile from PyCifRW
    stru        -- Structure instance used for cif input or output

    Data members used for input only:

    spacegroup  -- instance of SpaceGroup used for symmetry expansion
    eps         -- resolution in fractional coordinates for non-equal
                   positions.  Use for expansion of asymmetric unit.
    eau         -- instance of ExpandAsymmetricUnit from SymmetryUtilities
    asymmetric_unit -- list of atom instances for the original asymmetric
                   unit in the CIF file
    labelindex  -- dictionary mapping unique atom label to index of atom
                   in self.asymmetric_unit
    cif_sgname  -- space group name obtained by looking up the value of
                   _space_group_name_Hall, _symmetry_space_group_name_Hall,
                   _space_group_name_H-M_alt, _symmetry_space_group_name_H-M
                   items.  None when neither is defined.
    """

    # static data and methods ------------------------------------------------

    # dictionary set of class methods for translating CIF values
    # to Atom attributes

    _atom_setters = dict.fromkeys((
        '_tr_ignore',
        '_tr_atom_site_label',
        '_tr_atom_site_type_symbol',
        '_tr_atom_site_fract_x',
        '_tr_atom_site_fract_y',
        '_tr_atom_site_fract_z',
        '_tr_atom_site_cartn_x',
        '_tr_atom_site_cartn_y',
        '_tr_atom_site_cartn_z',
        '_tr_atom_site_U_iso_or_equiv',
        '_tr_atom_site_B_iso_or_equiv',
        '_tr_atom_site_adp_type', '_tr_atom_site_thermal_displace_type',
        '_tr_atom_site_occupancy',
        '_tr_atom_site_aniso_U_11',
        '_tr_atom_site_aniso_U_22',
        '_tr_atom_site_aniso_U_33',
        '_tr_atom_site_aniso_U_12',
        '_tr_atom_site_aniso_U_13',
        '_tr_atom_site_aniso_U_23',
        '_tr_atom_site_aniso_B_11',
        '_tr_atom_site_aniso_B_22',
        '_tr_atom_site_aniso_B_33',
        '_tr_atom_site_aniso_B_12',
        '_tr_atom_site_aniso_B_13',
        '_tr_atom_site_aniso_B_23',
        ))
    # make _atom_setters case insensitive
    for k in list(_atom_setters.keys()):
        _atom_setters[k] = _atom_setters[k.lower()] = k
    del k

    BtoU = 1.0/(8 * numpy.pi**2)

    def _tr_ignore(a, value):
        return
    _tr_ignore = staticmethod(_tr_ignore)

    def _tr_atom_site_label(a, value):
        a.label = str(value)
        # set element when not specified by _atom_site_type_symbol
        if not a.element:
            P_cif._tr_atom_site_type_symbol(a, value)
    _tr_atom_site_label = staticmethod(_tr_atom_site_label)

    # 3 regexp groups for nucleon number, atom symbol, and oxidation state
    _psymb = re.compile(r'(\d+-)?([a-zA-Z]+)(\d[+-])?')

    def _tr_atom_site_type_symbol(a, value):
        rx = P_cif._psymb.match(value)
        smbl = rx and rx.group(0) or value
        smbl = str(smbl)
        a.element = smbl[:1].upper() + smbl[1:].lower()
    _tr_atom_site_type_symbol = staticmethod(_tr_atom_site_type_symbol)

    def _tr_atom_site_fract_x(a, value):
        a.xyz[0] = leading_float(value)
    _tr_atom_site_fract_x = staticmethod(_tr_atom_site_fract_x)

    def _tr_atom_site_fract_y(a, value):
        a.xyz[1] = leading_float(value)
    _tr_atom_site_fract_y = staticmethod(_tr_atom_site_fract_y)

    def _tr_atom_site_fract_z(a, value):
        a.xyz[2] = leading_float(value)
    _tr_atom_site_fract_z = staticmethod(_tr_atom_site_fract_z)

    def _tr_atom_site_cartn_x(a, value):
        a.xyz_cartn[0] = leading_float(value)
    _tr_atom_site_cartn_x = staticmethod(_tr_atom_site_cartn_x)

    def _tr_atom_site_cartn_y(a, value):
        a.xyz_cartn[1] = leading_float(value)
    _tr_atom_site_cartn_y = staticmethod(_tr_atom_site_cartn_y)

    def _tr_atom_site_cartn_z(a, value):
        a.xyz_cartn[2] = leading_float(value)
    _tr_atom_site_cartn_z = staticmethod(_tr_atom_site_cartn_z)

    def _tr_atom_site_U_iso_or_equiv(a, value):
        a.Uisoequiv = leading_float(value)
    _tr_atom_site_U_iso_or_equiv = staticmethod(_tr_atom_site_U_iso_or_equiv)

    def _tr_atom_site_B_iso_or_equiv(a, value):
        a.Uisoequiv = P_cif.BtoU * leading_float(value)
    _tr_atom_site_B_iso_or_equiv = staticmethod(_tr_atom_site_B_iso_or_equiv)

    def _tr_atom_site_adp_type(a, value):
        a.anisotropy = value not in ("Uiso", "Biso")
    _tr_atom_site_adp_type = staticmethod(_tr_atom_site_adp_type)
    _tr_atom_site_thermal_displace_type = _tr_atom_site_adp_type

    def _tr_atom_site_occupancy(a, value):
        a.occupancy = leading_float(value, 1.0)
    _tr_atom_site_occupancy = staticmethod(_tr_atom_site_occupancy)

    def _tr_atom_site_aniso_U_11(a, value):
        a.U11 = leading_float(value)
    _tr_atom_site_aniso_U_11 = staticmethod(_tr_atom_site_aniso_U_11)

    def _tr_atom_site_aniso_U_22(a, value):
        a.U22 = leading_float(value)
    _tr_atom_site_aniso_U_22 = staticmethod(_tr_atom_site_aniso_U_22)

    def _tr_atom_site_aniso_U_33(a, value):
        a.U33 = leading_float(value)
    _tr_atom_site_aniso_U_33 = staticmethod(_tr_atom_site_aniso_U_33)

    def _tr_atom_site_aniso_U_12(a, value):
        a.U12 = leading_float(value)
    _tr_atom_site_aniso_U_12 = staticmethod(_tr_atom_site_aniso_U_12)

    def _tr_atom_site_aniso_U_13(a, value):
        a.U13 = leading_float(value)
    _tr_atom_site_aniso_U_13 = staticmethod(_tr_atom_site_aniso_U_13)

    def _tr_atom_site_aniso_U_23(a, value):
        a.U23 = leading_float(value)
    _tr_atom_site_aniso_U_23 = staticmethod(_tr_atom_site_aniso_U_23)

    def _tr_atom_site_aniso_B_11(a, value):
        a.U11 = P_cif.BtoU * leading_float(value)
    _tr_atom_site_aniso_B_11 = staticmethod(_tr_atom_site_aniso_B_11)

    def _tr_atom_site_aniso_B_22(a, value):
        a.U22 = P_cif.BtoU * leading_float(value)
    _tr_atom_site_aniso_B_22 = staticmethod(_tr_atom_site_aniso_B_22)

    def _tr_atom_site_aniso_B_33(a, value):
        a.U33 = P_cif.BtoU * leading_float(value)
    _tr_atom_site_aniso_B_33 = staticmethod(_tr_atom_site_aniso_B_33)

    def _tr_atom_site_aniso_B_12(a, value):
        a.U12 = P_cif.BtoU * leading_float(value)
    _tr_atom_site_aniso_B_12 = staticmethod(_tr_atom_site_aniso_B_12)

    def _tr_atom_site_aniso_B_13(a, value):
        a.U13 = P_cif.BtoU * leading_float(value)
    _tr_atom_site_aniso_B_13 = staticmethod(_tr_atom_site_aniso_B_13)

    def _tr_atom_site_aniso_B_23(a, value):
        a.U23 = P_cif.BtoU * leading_float(value)
    _tr_atom_site_aniso_B_23 = staticmethod(_tr_atom_site_aniso_B_23)


    def _get_atom_setters(cifloop):
        """Find translators of CifLoop items to data in Atom instance.
        Static method.

        cifloop -- instance of CifLoop

        Return a list of setter functions in the order of cifloop.keys().
        """
        rv = []
        for p in cifloop.keys():
            lcname = "_tr" + p.lower()
            fncname = P_cif._atom_setters.get(lcname, '_tr_ignore')
            f = getattr(P_cif, fncname)
            rv.append(f)
        return rv
    _get_atom_setters = staticmethod(_get_atom_setters)

    # normal methods ---------------------------------------------------------

    def __init__(self, eps=None):
        """Initialize the parser for CIF structure files.

        eps  -- fractional coordinates cutoff for duplicate positions.
                When None use the default for ExpandAsymmetricUnit.
        """
        StructureParser.__init__(self)
        self.format = "cif"
        self.ciffile = None
        self.stru = None
        self.spacegroup = None
        self.eps = eps
        self.eau = None
        self.asymmetric_unit = None
        self.labelindex = {}
        self.cif_sgname = None
        pass


    def parse(self, s):
        """Create Structure instance from a string in CIF format.

        Return Structure instance or raise StructureFormatError.
        """
        self.ciffile = None
        self.filename = ''
        fp = six.StringIO(s)
        rv = self._parseCifDataSource(fp)
        return rv


    def parseLines(self, lines):
        """Parse list of lines in CIF format.

        lines -- list of strings stripped of line terminator

        Return Structure instance or raise StructureFormatError.
        """
        s = "\n".join(lines) + '\n'
        return self.parse(s)


    def parseFile(self, filename):
        """Create Structure from an existing CIF file.

        filename  -- path to structure file

        Return Structure object.
        Raise StructureFormatError or IOError.
        """
        self.ciffile = None
        self.filename = filename
        fileurl = _fixIfWindowsPath(filename)
        rv = self._parseCifDataSource(fileurl)
        # all good here
        return rv


    def _parseCifDataSource(self, datasource):
        """\
        Open and process CIF data from the specified `datasource`.


        Parameters
        ----------
        datasource : str or a file-like object
            This is used as an argument to the CifFile class.  The CifFile
            instance is stored in `ciffile` attribute of this Parser.

        Returns
        -------
        Structure
            The Structure object loaded from the specified data source.

        Raises
        ------
        StructureFormatError
            When the data do not constitute a valid CIF format.
        """
        from CifFile import CifFile, StarError
        self.stru = None
        try:
            with _suppressCifParserOutput():
                self.ciffile = CifFile(datasource)
                for blockname in self.ciffile.keys():
                    self._parseCifBlock(blockname)
                    # stop after reading the first structure
                    if self.stru is not None:
                        break
        except (StarError, ValueError, IndexError) as err:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            emsg = str(err).strip()
            e = StructureFormatError(emsg)
            six.reraise(StructureFormatError, e, exc_traceback)
        return self.stru


    def _parseCifBlock(self, blockname):
        """Translate CIF file block, skip blocks without _atom_site_label.
        Updates data members stru, eau.

        blockname  -- name of top level block in self.ciffile

        No return value.
        """
        block = self.ciffile[blockname]
        if '_atom_site_label' not in block:   return
        # here block contains structure, initialize output data
        self.stru = Structure()
        self.labelindex.clear()
        # execute specialized block parsers
        self._parse_lattice(block)
        self._parse_atom_site_label(block)
        self._parse_atom_site_aniso_label(block)
        self._parse_space_group_symop_operation_xyz(block)
        return


    def _parse_lattice(self, block):
        """Obtain lattice parameters from a CifBlock.
        This method updates self.stru.lattice.

        block -- instance of CifBlock

        No return value.
        """
        if '_cell_length_a' not in block: return
        # obtain lattice parameters
        try:
            latpars = (
                leading_float(block['_cell_length_a']),
                leading_float(block['_cell_length_b']),
                leading_float(block['_cell_length_c']),
                leading_float(block['_cell_angle_alpha']),
                leading_float(block['_cell_angle_beta']),
                leading_float(block['_cell_angle_gamma']),
            )
        except KeyError as err:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            emsg = str(err)
            e = StructureFormatError(emsg)
            six.reraise(StructureFormatError, e, exc_traceback)
        self.stru.lattice = Lattice(*latpars)
        return


    def _parse_atom_site_label(self, block):
        """Obtain atoms in asymmetric unit from a CifBlock.
        This method inserts Atom instances to self.stru and
        updates labelindex dictionary.

        block -- instance of CifBlock

        No return value.
        """
        # process _atom_site_label
        atom_site_loop = block.GetLoop('_atom_site_label')
        # get a list of setters for atom_site values
        prop_setters = P_cif._get_atom_setters(atom_site_loop)
        # index of the _atom_site_label item for the labelindex dictionary
        ilb = atom_site_loop.keys().index('_atom_site_label')
        # loop through the values and pass them to the setters
        sitedatalist = zip(*atom_site_loop.values())
        for values in sitedatalist:
            curlabel = values[ilb]
            # skip entries that have invalid label
            if curlabel == '?':
                continue
            self.labelindex[curlabel] = len(self.stru)
            self.stru.addNewAtom()
            a = self.stru.getLastAtom()
            for fset, val in zip(prop_setters, values):
                fset(a, val)
        return


    def _parse_atom_site_aniso_label(self, block):
        """Obtain value of anisotropic thermal displacements from a CifBlock.
        This method updates U members of Atom instances in self.stru.
        The labelindex dictionary has to be defined beforehand.

        block -- instance of CifBlock

        No return value.
        """
        if '_atom_site_aniso_label' not in block: return
        # was anisotropy set in the _atom_site_label loop?
        atom_site_loop = block.GetLoop('_atom_site_label')
        anisotropy_already_set = (
            '_atom_site_adp_type' in atom_site_loop or
            '_atom_site_thermal_displace_type' in atom_site_loop)
        # something to do here:
        adp_loop = block.GetLoop('_atom_site_aniso_label')
        # index of the _atom_site_label column
        ilb = adp_loop.keys().index('_atom_site_aniso_label')
        # get a list of setters for this loop
        prop_setters = P_cif._get_atom_setters(adp_loop)
        sitedatalist = zip(*adp_loop.values())
        for values in sitedatalist:
            idx = self.labelindex[values[ilb]]
            a = self.stru[idx]
            if not anisotropy_already_set:
                a.anisotropy = True
            for fset, val in zip(prop_setters, values):
                fset(a, val)
        return


    def _parse_space_group_symop_operation_xyz(self, block):
        """Process symmetry operations from a CifBlock.  The method
        updates spacegroup and eau data according to symmetry
        operations defined in _space_group_symop_operation_xyz or
        _symmetry_equiv_pos_as_xyz items in CifBlock.

        block -- instance of CifBlock

        No return value.
        """
        from diffpy.structure.spacegroups import IsSpaceGroupIdentifier
        from diffpy.structure.spacegroups import SpaceGroup, GetSpaceGroup
        self.asymmetric_unit = list(self.stru)
        sym_synonyms = ('_space_group_symop_operation_xyz',
                        '_symmetry_equiv_pos_as_xyz')
        sym_loop_name = [n for n in sym_synonyms if n in block]
        # recover explicit list of symmetry operations
        symop_list = []
        if sym_loop_name:
            # sym_loop exists here and we know its cif name
            sym_loop_name = sym_loop_name[0]
            sym_loop = block.GetLoop(sym_loop_name)
            for eqxyz in sym_loop[sym_loop_name]:
                opcif = getSymOp(eqxyz)
                symop_list.append(opcif)
        # determine space group number
        sg_nameHall = (block.get('_space_group_name_Hall', '') or
                block.get('_symmetry_space_group_name_Hall', ''))
        sg_nameHM = (block.get('_space_group_name_H-M_alt', '') or
                block.get('_symmetry_space_group_name_H-M', ''))
        self.cif_sgname = (sg_nameHall or sg_nameHM or None)
        sgid = (int(block.get('_space_group_IT_number', '0')) or
                int(block.get('_symmetry_Int_Tables_number', '0')) or
                sg_nameHM)
        # try to reuse existing space group
        self.spacegroup = None
        if sgid and IsSpaceGroupIdentifier(sgid):
            sgstd = GetSpaceGroup(sgid)
            oprep_std = [str(op) for op in sgstd.iter_symops()]
            oprep_std.sort()
            oprep_cif = [str(op) for op in symop_list]
            oprep_cif.sort()
            # make sure symmetry operations have the same order
            if oprep_std == oprep_cif:
                self.spacegroup = copy.copy(sgstd)
                self.spacegroup.symop_list = symop_list
            # use standard definition when symmetry operations were not listed
            elif not symop_list:
                self.spacegroup = sgstd
        # define new spacegroup when symmetry operations were listed, but
        # there is no match to an existing definition
        if symop_list and self.spacegroup is None:
            new_short_name = "CIF " + (sg_nameHall or 'data')
            new_crystal_system = (
                    block.get('_space_group_crystal_system') or
                    block.get('_symmetry_cell_setting') or
                    'TRICLINIC' ).upper()
            self.spacegroup = SpaceGroup(
                    short_name=new_short_name,
                    crystal_system=new_crystal_system,
                    symop_list=symop_list)
        if self.spacegroup is None:
            emsg = "CIF file has unknown space group identifier {!r}."
            raise StructureFormatError(emsg.format(sgid))
        self._expandAsymmetricUnit()
        return


    def _expandAsymmetricUnit(self):
        """Perform symmetry expansion of self.stru using self.spacegroup.
        This method updates data in stru and eau.

        No return value.
        """
        from diffpy.structure.symmetryutilities import ExpandAsymmetricUnit
        # get reverse-ordered unique indices
        corepos = [a.xyz for a in self.stru]
        coreUijs = [a.U for a in self.stru]
        self.eau = ExpandAsymmetricUnit(self.spacegroup, corepos, coreUijs,
                                        eps=self.eps)
        # build a nested list of new atoms:
        newatoms = []
        for i, ca in enumerate(self.stru):
            eca = []    # expanded core atom
            for j in range(self.eau.multiplicity[i]):
                a = Atom(ca)
                a.xyz = self.eau.expandedpos[i][j]
                if j > 0:
                    a.label += '_' + str(j + 1)
                if a.anisotropy:
                    a.U = self.eau.expandedUijs[i][j]
                eca.append(a)
            newatoms.append(eca)
        # insert new atoms where they belong
        self.stru[:] = sum(newatoms, [])
        return

    # conversion to CIF ------------------------------------------------------

    def toLines(self, stru):
        """Convert Structure stru to a list of lines in basic CIF format.

        Return list of strings.
        """
        import time
        lines = []
        # may be replaced with filtered Structure.title
        # for now, we can add the title as a comment
        if stru.title.strip() != "":
            title_lines = stru.title.split('\n')
            lines.extend([ "# " + line.strip() for line in title_lines ])
            lines.append("")
        lines.append("data_3D")
        iso_date =  "%04i-%02i-%02i" % time.gmtime()[:3]
        lines.extend([
            "%-31s %s" % ("_audit_creation_date", iso_date),
            "%-31s %s" % ("_audit_creation_method", "P_cif.py"),
            "",
            "%-31s %s" % ("_symmetry_space_group_name_H-M", "'P1'"),
            "%-31s %s" % ("_symmetry_Int_Tables_number", "1"),
            "%-31s %s" % ("_symmetry_cell_setting", "triclinic"),
            "" ])
        # there should be no need to specify equivalent positions for P1
        # _symmetry_equiv_posi_as_xyz x,y,z
        lines.extend([
            "%-31s %.6g" % ("_cell_length_a", stru.lattice.a),
            "%-31s %.6g" % ("_cell_length_b", stru.lattice.b),
            "%-31s %.6g" % ("_cell_length_c", stru.lattice.c),
            "%-31s %.6g" % ("_cell_angle_alpha", stru.lattice.alpha),
            "%-31s %.6g" % ("_cell_angle_beta", stru.lattice.beta),
            "%-31s %.6g" % ("_cell_angle_gamma", stru.lattice.gamma),
            "" ])
        # build a list of site labels and adp (displacement factor) types
        element_count = {}
        a_site_label = []
        a_adp_type = []
        for a in stru:
            cnt = element_count[a.element] = element_count.get(a.element,0)+1
            a_site_label.append( "%s%i" % (a.element, cnt) )
            if numpy.all(a.U == a.U[0,0]*numpy.identity(3)):
                a_adp_type.append("Uiso")
            else:
                a_adp_type.append("Uani")
        # list all atoms
        lines.extend([
            "loop_",
            "  _atom_site_label",
            "  _atom_site_type_symbol",
            "  _atom_site_fract_x",
            "  _atom_site_fract_y",
            "  _atom_site_fract_z",
            "  _atom_site_U_iso_or_equiv",
            "  _atom_site_adp_type",
            "  _atom_site_occupancy" ])
        for i in range(len(stru)):
            a = stru[i]
            line = "  %-5s %-3s %11.6f %11.6f %11.6f %11.6f %-5s %.4f" % (
                    a_site_label[i], a.element, a.xyz[0], a.xyz[1], a.xyz[2],
                    a.Uisoequiv, a_adp_type[i], a.occupancy  )
            lines.append(line)
        # find anisotropic atoms
        idx_aniso = [ i for i in range(len(stru)) if a_adp_type[i] != "Uiso" ]
        if idx_aniso != []:
            lines.extend([
                "loop_",
                "  _atom_site_aniso_label",
                "  _atom_site_aniso_U_11",
                "  _atom_site_aniso_U_22",
                "  _atom_site_aniso_U_33",
                "  _atom_site_aniso_U_12",
                "  _atom_site_aniso_U_13",
                "  _atom_site_aniso_U_23" ])
            for i in idx_aniso:
                a = stru[i]
                line = "  %-5s %9.6f %9.6f %9.6f %9.6f %9.6f %9.6f" % (
                        a_site_label[i], a.U[0,0], a.U[1,1], a.U[2,2],
                        a.U[0,1], a.U[0,2], a.U[1,2] )
                lines.append(line)
        return lines
예제 #8
0
    def parseLines(self, lines):
        """Parse list of lines in XYZ format.

        Return Structure object or raise StructureFormatError.
        """
        linefields = [l.split() for l in lines]
        # prepare output structure
        stru = Structure()
        # find first valid record
        start = 0
        for field in linefields:
            if len(field) == 0 or field[0] == "#":
                start += 1
            else:
                break
        # first valid line gives number of atoms
        try:
            lfs = linefields[start]
            w1 = linefields[start][0]
            if len(lfs) == 1 and str(int(w1)) == w1:
                p_natoms = int(w1)
                stru.title = lines[start+1].strip()
                start += 2
            else:
                emsg = ("%d: invalid XYZ format, missing number of atoms" %
                        (start + 1))
                raise StructureFormatError(emsg)
        except (IndexError, ValueError):
            exc_type, exc_value, exc_traceback = sys.exc_info()
            emsg = ("%d: invalid XYZ format, missing number of atoms" %
                    (start + 1))
            e = StructureFormatError(emsg)
            six.reraise(StructureFormatError, e, exc_traceback)
        # find the last valid record
        stop = len(lines)
        while stop > start and len(linefields[stop-1]) == 0:
            stop -= 1
        # get out for empty structure
        if p_natoms == 0 or start >= stop:
            return stru
        # here we have at least one valid record line
        nfields = len(linefields[start])
        if nfields != 4:
            emsg = "%d: invalid XYZ format, expected 4 columns" % (start + 1)
            raise StructureFormatError(emsg)
        # now try to read all record lines
        try:
            p_nl = start
            for fields in linefields[start:] :
                p_nl += 1
                if fields == []:
                    continue
                elif len(fields) != nfields:
                    emsg = ('%d: all lines must have ' +
                            'the same number of columns') % p_nl
                    raise StructureFormatError(emsg)
                element = fields[0]
                element = element[0].upper() + element[1:].lower()
                xyz = [ float(f) for f in fields[1:4] ]
                stru.addNewAtom(element, xyz=xyz)
        except ValueError:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            emsg = "%d: invalid number format" % p_nl
            e = StructureFormatError(emsg)
            six.reraise(StructureFormatError, e, exc_traceback)
        # finally check if all the atoms have been read
        if p_natoms is not None and len(stru) != p_natoms:
            emsg = "expected %d atoms, read %d" % (p_natoms, len(stru))
            raise StructureFormatError(emsg)
        return stru