Ejemplo n.º 1
0
def test_opt_scflog_merge():
    plugins.load_plugin_classes([CrystalOutputPlugin, CrystalSCFLogPlugin],
                                "parsers")
    opt = plugins.parse(os.path.join(file_folder, "scf_and_opt.crystal.out"))
    scflog = plugins.parse(
        os.path.join(file_folder, "scf_and_opt.crystal.scflog"))
    opt_all = edict.merge([opt, scflog], list_of_dicts=True)
    # with open(os.path.join(file_folder, "opt_merge_scflog.crystal.out.json"), "w") as f:
    #     json.dump(opt_all, f, indent=1, default=plugins.encode)

    expected = ejson.to_dict(
        os.path.join(file_folder, "opt_merge_scflog.crystal.out.json"))
    assert edict.diff(opt_all, expected, np_allclose=True) == {}
    def validate(self):
        # only allow 3d structures
        if not all(self.inputs.structure.pbc):
            raise ValidationError(
                "the structure must be 3D (i.e. have all dimensions pbc=True)")

        settings_dict = self.inputs.settings.get_dict(
        ) if "settings" in self.inputs else {}
        settings_dict = edict.merge([self._settings_defaults, settings_dict],
                                    overwrite=True)

        validate_with_dict(settings_dict, self._settings_schema)

        self.ctx.settings = AttributeDict(settings_dict)

        self.ctx.structdict = structure_to_dict(self.inputs.structure)
Ejemplo n.º 3
0
def format_config_yaml(file_obj, errormsg_only=False):
    """read config, merge defaults into runs, for each run: drop local or qsub and check against schema

    Parameters
    ----------
    file_obj : str or file_like
    errormsg_only: bool
        only return the human readable message part of the jsonschema.ValidationError

    """
    logger.info("reading config: {}".format(file_obj))

    if isinstance(file_obj, basestring):
        file_obj = pathlib.Path(file_obj)

    ryaml = YAML()
    dct = ryaml.load(file_obj)

    logger.info("validating & formatting config: {}".format(file_obj))

    try:
        validate(dct, _config_schema)
    except ValidationError as err:
        if errormsg_only:
            err = err.message
        raise ValidationError("error in top-level config: {0}".format(err))

    runs = []
    defaults = edict.merge(
        [_global_defaults, dct.get('defaults', {})], overwrite=True)

    for i, run in enumerate(dct['runs']):

        new_run = edict.merge([defaults, run], overwrite=True)
        try:
            validate(new_run, _run_schema)
        except ValidationError as err:
            if errormsg_only:
                err = err.message
            raise ValidationError("error in run #{0} config: {1}".format(
                i + 1, err))

        if new_run["input"] is not None:
            all_none = True
            if new_run["input"]["remote"] is not None and new_run["input"][
                    "remote"]["hostname"] is None:
                new_run["input"]["remote"] = None
            for field in ["remote", "path", "scripts", "files", "variables"]:
                if new_run["input"][field] is not None:
                    all_none = False
            if all_none:
                new_run["input"] = None

        if new_run["output"]["remote"]["hostname"] is None:
            new_run["output"]["remote"] = None

        runs.append(new_run)

    ids = edict.filter_keys(runs, ['id'], list_of_dicts=True)
    ids = edict.combine_lists(ids)['id']
    if not len(set(ids)) == len(ids):
        raise ValidationError("the run ids are not unique: {}".format(ids))

    return runs
Ejemplo n.º 4
0
def parse_geometry_section(data, initial_lineno, line, lines):
    """Parse a section of geometry related variables.

    Parameters
    ----------
    data: dict
        existing data to add the geometry data to
    initial_lineno: int
    line: str
        the current line
    lines: list[str]

    Notes
    -----

    For initial and 'FINAL OPTIMIZED GEOMETRY' only::

        DIRECT LATTICE VECTORS CARTESIAN COMPONENTS (ANGSTROM)
                X                    Y                    Z
        0.355114561000E+01   0.000000000000E+00   0.000000000000E+00
        0.000000000000E+00   0.355114561000E+01   0.000000000000E+00
        0.000000000000E+00   0.000000000000E+00   0.535521437000E+01


        CARTESIAN COORDINATES - PRIMITIVE CELL
        *******************************************************************************
        *      ATOM          X(ANGSTROM)         Y(ANGSTROM)         Z(ANGSTROM)
        *******************************************************************************
            1    26 FE    0.000000000000E+00  0.000000000000E+00  0.000000000000E+00
            2    26 FE    1.775572805000E+00  1.775572805000E+00  0.000000000000E+00
            3    16 S    -1.110223024625E-16  1.775572805000E+00  1.393426779074E+00
            4    16 S     1.775572805000E+00  7.885127240037E-16 -1.393426779074E+00

    For initial, final and optimisation steps:

    Primitive cell::

        PRIMITIVE CELL - CENTRING CODE 1/0 VOLUME=    36.099581 - DENSITY  6.801 g/cm^3
                A              B              C           ALPHA      BETA       GAMMA
            2.94439264     2.94439264     4.16400000    90.000000  90.000000  90.000000
        *******************************************************************************
        ATOMS IN THE ASYMMETRIC UNIT    4 - ATOMS IN THE UNIT CELL:    4
            ATOM                 X/A                 Y/B                 Z/C
        *******************************************************************************
            1 T  28 NI    0.000000000000E+00  0.000000000000E+00  0.000000000000E+00

    Crystallographic cell (only if the geometry is not originally primitive)::

        CRYSTALLOGRAPHIC CELL (VOLUME=         74.61846100)
                A              B              C           ALPHA      BETA       GAMMA
            4.21000000     4.21000000     4.21000000    90.000000  90.000000  90.000000

        COORDINATES IN THE CRYSTALLOGRAPHIC CELL
            ATOM                 X/A                 Y/B                 Z/C
        *******************************************************************************
            1 T  12 MG    0.000000000000E+00  0.000000000000E+00  0.000000000000E+00

    """

    # check that units are correct (probably not needed)
    if fnmatch(line, "LATTICE PARAMETERS*(*)"):
        if not ("ANGSTROM" in line and "DEGREES" in line):
            raise IOError(
                "was expecting lattice parameters in angstroms and degrees on line:"
                " {0}, got: {1}".format(initial_lineno, line))
        return

    for pattern, field, pattern2 in [
        ("PRIMITIVE*CELL*", "primitive_cell", "ATOMS IN THE ASYMMETRIC UNIT*"),
        (
            "CRYSTALLOGRAPHIC*CELL*",
            "crystallographic_cell",
            "COORDINATES IN THE CRYSTALLOGRAPHIC CELL",
        ),
    ]:
        if fnmatch(line, pattern):
            if not fnmatch(lines[initial_lineno + 1].strip(),
                           "A*B*C*ALPHA*BETA*GAMMA"):
                raise IOError("was expecting A B C ALPHA BETA GAMMA on line:"
                              " {0}, got: {1}".format(
                                  initial_lineno + 1,
                                  lines[initial_lineno + 1]))
            data[field] = edict.merge([
                data.get(field, {}),
                {
                    "cell_parameters":
                    dict(
                        zip(
                            ["a", "b", "c", "alpha", "beta", "gamma"],
                            split_numbers(lines[initial_lineno + 2]),
                        ))
                },
            ])
        elif fnmatch(line, pattern2):
            periodic = [True, True, True]
            if not fnmatch(lines[initial_lineno + 1].strip(),
                           "ATOM*X/A*Y/B*Z/C"):
                # for 2d (slab) can get z in angstrom (and similar for 1d)
                if fnmatch(lines[initial_lineno + 1].strip(),
                           "ATOM*X/A*Y/B*Z(ANGSTROM)*"):
                    periodic = [True, True, False]
                elif fnmatch(
                        lines[initial_lineno + 1].strip(),
                        "ATOM*X/A*Y(ANGSTROM)*Z(ANGSTROM)*",
                ):
                    periodic = [True, False, False]
                elif fnmatch(
                        lines[initial_lineno + 1].strip(),
                        "ATOM*X(ANGSTROM)*Y(ANGSTROM)*Z(ANGSTROM)*",
                ):
                    periodic = [False, False, False]
                    cell_params = dict(
                        zip(
                            ["a", "b", "c", "alpha", "beta", "gamma"],
                            [500.0, 500.0, 500.0, 90.0, 90.0, 90.0],
                        ))
                    data[field] = edict.merge([
                        data.get(field, {}), {
                            "cell_parameters": cell_params
                        }
                    ])
                else:
                    raise IOError(
                        "was expecting ATOM X Y Z (in units of ANGSTROM or fractional) on line:"
                        " {0}, got: {1}".format(initial_lineno + 1,
                                                lines[initial_lineno + 1]))
            if not all(periodic) and "cell_parameters" not in data.get(
                    field, {}):
                raise IOError(
                    "require cell parameters to have been set for non-periodic directions in line"
                    " #{0} : {1}".format(initial_lineno + 1,
                                         lines[initial_lineno + 1]))
            a, b, c, alpha, beta, gamma = [None] * 6
            if not all(periodic):
                cell = data[field]["cell_parameters"]
                a, b, c, alpha, beta, gamma = [
                    cell[p] for p in ["a", "b", "c", "alpha", "beta", "gamma"]
                ]

            curr_lineno = initial_lineno + 3
            atom_data = {
                "ids": [],
                "assymetric": [],
                "atomic_numbers": [],
                "symbols": [],
            }
            atom_data["pbc"] = periodic
            while (lines[curr_lineno].strip()
                   and not lines[curr_lineno].strip()[0].isalpha()):
                fields = lines[curr_lineno].strip().split()
                atom_data["ids"].append(fields[0])
                atom_data["assymetric"].append(bool(strtobool(fields[1])))
                atom_data["atomic_numbers"].append(int(fields[2]))
                atom_data["symbols"].append(fields[3].lower().capitalize())
                if all(periodic):
                    atom_data.setdefault("fcoords", []).append(
                        [float(fields[4]),
                         float(fields[5]),
                         float(fields[6])])
                elif periodic == [True, True, False
                                  ] and alpha == 90 and beta == 90:
                    atom_data.setdefault("fcoords", []).append([
                        float(fields[4]),
                        float(fields[5]),
                        float(fields[6]) / c
                    ])
                elif periodic == [False, False, False]:
                    atom_data.setdefault("ccoords", []).append(
                        [float(fields[4]),
                         float(fields[5]),
                         float(fields[6])])

                # TODO other periodic types (1D)
                curr_lineno += 1

            data[field] = edict.merge([data.get(field, {}), atom_data])

    # TODO These coordinates are present in initial and final optimized sections,
    # but DON'T work with lattice parameters
    if fnmatch(line, "CARTESIAN COORDINATES - PRIMITIVE CELL*"):
        if not fnmatch(
                lines[initial_lineno + 2].strip(),
                "*ATOM*X(ANGSTROM)*Y(ANGSTROM)*Z(ANGSTROM)",
        ):
            raise IOError(
                "was expecting ATOM X(ANGSTROM) Y(ANGSTROM) Z(ANGSTROM) on line:"
                " {0}, got: {1}".format(initial_lineno + 2,
                                        lines[initial_lineno + 2]))

        curr_lineno = initial_lineno + 4
        atom_data = {
            "ids": [],
            "atomic_numbers": [],
            "symbols": [],
            "ccoords": []
        }
        while (lines[curr_lineno].strip()
               and not lines[curr_lineno].strip()[0].isalpha()):
            fields = lines[curr_lineno].strip().split()
            if len(fields) < 6:
                raise IOError("was expecting ID ANUM SYMBOL X Y Z on line:"
                              " {0}, got: {1}".format(curr_lineno,
                                                      lines[curr_lineno]))
            atom_data["ids"].append(fields[0])
            atom_data["atomic_numbers"].append(int(fields[1]))
            atom_data["symbols"].append(fields[2].lower().capitalize())
            atom_data["ccoords"].append(
                [float(fields[3]),
                 float(fields[4]),
                 float(fields[5])])
            curr_lineno += 1
        data["primitive_cell"] = edict.merge(
            [data.get("primitive_cell", {}), atom_data])

    elif fnmatch(line, "DIRECT LATTICE VECTORS CARTESIAN COMPONENTS*"):
        if "ANGSTROM" not in line:
            raise IOError("was expecting lattice vectors in angstroms on line:"
                          " {0}, got: {1}".format(initial_lineno, line))
        if not fnmatch(lines[initial_lineno + 1].strip(), "X*Y*Z"):
            raise IOError("was expecting X Y Z on line:"
                          " {0}, got: {1}".format(initial_lineno + 1,
                                                  lines[initial_lineno + 1]))
        if "crystallographic_cell" not in data:
            data["crystallographic_cell"] = {}
        if "cell_vectors" in data["crystallographic_cell"]:
            raise IOError("found multiple cell vectors on line:"
                          " {0}, got: {1}".format(initial_lineno + 1,
                                                  lines[initial_lineno + 1]))
        vectors = {
            "a": split_numbers(lines[initial_lineno + 2]),
            "b": split_numbers(lines[initial_lineno + 3]),
            "c": split_numbers(lines[initial_lineno + 4]),
        }

        data["primitive_cell"]["cell_vectors"] = vectors
Ejemplo n.º 5
0
def get_schema(eltypes=None, trtypes=None):
    """

    Parameters
    ----------
    eltypes: list or None
        element schemas to allow (if None, allow all)
    trtypes: list or None
        transform schemas to allow (if None, allow all)

    Returns
    -------

    """

    eschemas = list(_element_schema.values()) if eltypes is None else [
        _element_schema[e] for e in eltypes
    ]
    tschemas = list(_transform_schema.values()) if trtypes is None else [
        _transform_schema[e] for e in trtypes
    ]

    vschema = {
        'type': 'object',
        'required': ['elements', 'transforms'],
        'properties': {
            'elements': {
                'type': 'array',
                'items': {
                    'type': 'object',
                    'required': ['transforms'],
                    'properties': {
                        'transforms': {
                            'type': 'array',
                            'items': {
                                # 'type': 'object',
                                # 'required': ['type'],
                                # 'oneOf': tschemas
                            },
                        },
                    },
                    # 'oneOf': eschemas
                },
            },
            'transforms': {
                'type': 'array',
                'items': {
                    # 'type': 'object',
                    # 'required': ['type'],
                    # 'oneOf': tschemas
                },
            }
        }
    }

    if len(tschemas) > 1:
        edict.indexes(
            vschema, ["properties", "transforms", "items"])["oneOf"] = tschemas
        edict.indexes(vschema, [
            "properties", "elements", "items", "properties", "transforms",
            "items"
        ])["oneOf"] = tschemas
    else:
        vschema["properties"]["transforms"]["items"] = tschemas[0]
        edict.indexes(
            vschema,
            ["properties", "elements", "items", "properties", "transforms"
             ])["items"] = tschemas[0]

    if len(eschemas) > 1:
        edict.indexes(vschema,
                      ["properties", "elements", "items"])["oneOf"] = eschemas
    else:
        vschema["properties"]["elements"]["items"] = edict.merge(
            [vschema["properties"]["elements"]["items"], eschemas[0]],
            append=True)

    # edict.pprint(vschema, depth=None)
    return vschema