def test_opt_scflog_merge(): plugins.load_plugin_classes([CrystalOutputPlugin, CrystalSCFLogPlugin], "parsers") opt = plugins.parse(os.path.join(file_folder, "scf_and_opt.crystal.out")) scflog = plugins.parse( os.path.join(file_folder, "scf_and_opt.crystal.scflog")) opt_all = edict.merge([opt, scflog], list_of_dicts=True) # with open(os.path.join(file_folder, "opt_merge_scflog.crystal.out.json"), "w") as f: # json.dump(opt_all, f, indent=1, default=plugins.encode) expected = ejson.to_dict( os.path.join(file_folder, "opt_merge_scflog.crystal.out.json")) assert edict.diff(opt_all, expected, np_allclose=True) == {}
def validate(self): # only allow 3d structures if not all(self.inputs.structure.pbc): raise ValidationError( "the structure must be 3D (i.e. have all dimensions pbc=True)") settings_dict = self.inputs.settings.get_dict( ) if "settings" in self.inputs else {} settings_dict = edict.merge([self._settings_defaults, settings_dict], overwrite=True) validate_with_dict(settings_dict, self._settings_schema) self.ctx.settings = AttributeDict(settings_dict) self.ctx.structdict = structure_to_dict(self.inputs.structure)
def format_config_yaml(file_obj, errormsg_only=False): """read config, merge defaults into runs, for each run: drop local or qsub and check against schema Parameters ---------- file_obj : str or file_like errormsg_only: bool only return the human readable message part of the jsonschema.ValidationError """ logger.info("reading config: {}".format(file_obj)) if isinstance(file_obj, basestring): file_obj = pathlib.Path(file_obj) ryaml = YAML() dct = ryaml.load(file_obj) logger.info("validating & formatting config: {}".format(file_obj)) try: validate(dct, _config_schema) except ValidationError as err: if errormsg_only: err = err.message raise ValidationError("error in top-level config: {0}".format(err)) runs = [] defaults = edict.merge( [_global_defaults, dct.get('defaults', {})], overwrite=True) for i, run in enumerate(dct['runs']): new_run = edict.merge([defaults, run], overwrite=True) try: validate(new_run, _run_schema) except ValidationError as err: if errormsg_only: err = err.message raise ValidationError("error in run #{0} config: {1}".format( i + 1, err)) if new_run["input"] is not None: all_none = True if new_run["input"]["remote"] is not None and new_run["input"][ "remote"]["hostname"] is None: new_run["input"]["remote"] = None for field in ["remote", "path", "scripts", "files", "variables"]: if new_run["input"][field] is not None: all_none = False if all_none: new_run["input"] = None if new_run["output"]["remote"]["hostname"] is None: new_run["output"]["remote"] = None runs.append(new_run) ids = edict.filter_keys(runs, ['id'], list_of_dicts=True) ids = edict.combine_lists(ids)['id'] if not len(set(ids)) == len(ids): raise ValidationError("the run ids are not unique: {}".format(ids)) return runs
def parse_geometry_section(data, initial_lineno, line, lines): """Parse a section of geometry related variables. Parameters ---------- data: dict existing data to add the geometry data to initial_lineno: int line: str the current line lines: list[str] Notes ----- For initial and 'FINAL OPTIMIZED GEOMETRY' only:: DIRECT LATTICE VECTORS CARTESIAN COMPONENTS (ANGSTROM) X Y Z 0.355114561000E+01 0.000000000000E+00 0.000000000000E+00 0.000000000000E+00 0.355114561000E+01 0.000000000000E+00 0.000000000000E+00 0.000000000000E+00 0.535521437000E+01 CARTESIAN COORDINATES - PRIMITIVE CELL ******************************************************************************* * ATOM X(ANGSTROM) Y(ANGSTROM) Z(ANGSTROM) ******************************************************************************* 1 26 FE 0.000000000000E+00 0.000000000000E+00 0.000000000000E+00 2 26 FE 1.775572805000E+00 1.775572805000E+00 0.000000000000E+00 3 16 S -1.110223024625E-16 1.775572805000E+00 1.393426779074E+00 4 16 S 1.775572805000E+00 7.885127240037E-16 -1.393426779074E+00 For initial, final and optimisation steps: Primitive cell:: PRIMITIVE CELL - CENTRING CODE 1/0 VOLUME= 36.099581 - DENSITY 6.801 g/cm^3 A B C ALPHA BETA GAMMA 2.94439264 2.94439264 4.16400000 90.000000 90.000000 90.000000 ******************************************************************************* ATOMS IN THE ASYMMETRIC UNIT 4 - ATOMS IN THE UNIT CELL: 4 ATOM X/A Y/B Z/C ******************************************************************************* 1 T 28 NI 0.000000000000E+00 0.000000000000E+00 0.000000000000E+00 Crystallographic cell (only if the geometry is not originally primitive):: CRYSTALLOGRAPHIC CELL (VOLUME= 74.61846100) A B C ALPHA BETA GAMMA 4.21000000 4.21000000 4.21000000 90.000000 90.000000 90.000000 COORDINATES IN THE CRYSTALLOGRAPHIC CELL ATOM X/A Y/B Z/C ******************************************************************************* 1 T 12 MG 0.000000000000E+00 0.000000000000E+00 0.000000000000E+00 """ # check that units are correct (probably not needed) if fnmatch(line, "LATTICE PARAMETERS*(*)"): if not ("ANGSTROM" in line and "DEGREES" in line): raise IOError( "was expecting lattice parameters in angstroms and degrees on line:" " {0}, got: {1}".format(initial_lineno, line)) return for pattern, field, pattern2 in [ ("PRIMITIVE*CELL*", "primitive_cell", "ATOMS IN THE ASYMMETRIC UNIT*"), ( "CRYSTALLOGRAPHIC*CELL*", "crystallographic_cell", "COORDINATES IN THE CRYSTALLOGRAPHIC CELL", ), ]: if fnmatch(line, pattern): if not fnmatch(lines[initial_lineno + 1].strip(), "A*B*C*ALPHA*BETA*GAMMA"): raise IOError("was expecting A B C ALPHA BETA GAMMA on line:" " {0}, got: {1}".format( initial_lineno + 1, lines[initial_lineno + 1])) data[field] = edict.merge([ data.get(field, {}), { "cell_parameters": dict( zip( ["a", "b", "c", "alpha", "beta", "gamma"], split_numbers(lines[initial_lineno + 2]), )) }, ]) elif fnmatch(line, pattern2): periodic = [True, True, True] if not fnmatch(lines[initial_lineno + 1].strip(), "ATOM*X/A*Y/B*Z/C"): # for 2d (slab) can get z in angstrom (and similar for 1d) if fnmatch(lines[initial_lineno + 1].strip(), "ATOM*X/A*Y/B*Z(ANGSTROM)*"): periodic = [True, True, False] elif fnmatch( lines[initial_lineno + 1].strip(), "ATOM*X/A*Y(ANGSTROM)*Z(ANGSTROM)*", ): periodic = [True, False, False] elif fnmatch( lines[initial_lineno + 1].strip(), "ATOM*X(ANGSTROM)*Y(ANGSTROM)*Z(ANGSTROM)*", ): periodic = [False, False, False] cell_params = dict( zip( ["a", "b", "c", "alpha", "beta", "gamma"], [500.0, 500.0, 500.0, 90.0, 90.0, 90.0], )) data[field] = edict.merge([ data.get(field, {}), { "cell_parameters": cell_params } ]) else: raise IOError( "was expecting ATOM X Y Z (in units of ANGSTROM or fractional) on line:" " {0}, got: {1}".format(initial_lineno + 1, lines[initial_lineno + 1])) if not all(periodic) and "cell_parameters" not in data.get( field, {}): raise IOError( "require cell parameters to have been set for non-periodic directions in line" " #{0} : {1}".format(initial_lineno + 1, lines[initial_lineno + 1])) a, b, c, alpha, beta, gamma = [None] * 6 if not all(periodic): cell = data[field]["cell_parameters"] a, b, c, alpha, beta, gamma = [ cell[p] for p in ["a", "b", "c", "alpha", "beta", "gamma"] ] curr_lineno = initial_lineno + 3 atom_data = { "ids": [], "assymetric": [], "atomic_numbers": [], "symbols": [], } atom_data["pbc"] = periodic while (lines[curr_lineno].strip() and not lines[curr_lineno].strip()[0].isalpha()): fields = lines[curr_lineno].strip().split() atom_data["ids"].append(fields[0]) atom_data["assymetric"].append(bool(strtobool(fields[1]))) atom_data["atomic_numbers"].append(int(fields[2])) atom_data["symbols"].append(fields[3].lower().capitalize()) if all(periodic): atom_data.setdefault("fcoords", []).append( [float(fields[4]), float(fields[5]), float(fields[6])]) elif periodic == [True, True, False ] and alpha == 90 and beta == 90: atom_data.setdefault("fcoords", []).append([ float(fields[4]), float(fields[5]), float(fields[6]) / c ]) elif periodic == [False, False, False]: atom_data.setdefault("ccoords", []).append( [float(fields[4]), float(fields[5]), float(fields[6])]) # TODO other periodic types (1D) curr_lineno += 1 data[field] = edict.merge([data.get(field, {}), atom_data]) # TODO These coordinates are present in initial and final optimized sections, # but DON'T work with lattice parameters if fnmatch(line, "CARTESIAN COORDINATES - PRIMITIVE CELL*"): if not fnmatch( lines[initial_lineno + 2].strip(), "*ATOM*X(ANGSTROM)*Y(ANGSTROM)*Z(ANGSTROM)", ): raise IOError( "was expecting ATOM X(ANGSTROM) Y(ANGSTROM) Z(ANGSTROM) on line:" " {0}, got: {1}".format(initial_lineno + 2, lines[initial_lineno + 2])) curr_lineno = initial_lineno + 4 atom_data = { "ids": [], "atomic_numbers": [], "symbols": [], "ccoords": [] } while (lines[curr_lineno].strip() and not lines[curr_lineno].strip()[0].isalpha()): fields = lines[curr_lineno].strip().split() if len(fields) < 6: raise IOError("was expecting ID ANUM SYMBOL X Y Z on line:" " {0}, got: {1}".format(curr_lineno, lines[curr_lineno])) atom_data["ids"].append(fields[0]) atom_data["atomic_numbers"].append(int(fields[1])) atom_data["symbols"].append(fields[2].lower().capitalize()) atom_data["ccoords"].append( [float(fields[3]), float(fields[4]), float(fields[5])]) curr_lineno += 1 data["primitive_cell"] = edict.merge( [data.get("primitive_cell", {}), atom_data]) elif fnmatch(line, "DIRECT LATTICE VECTORS CARTESIAN COMPONENTS*"): if "ANGSTROM" not in line: raise IOError("was expecting lattice vectors in angstroms on line:" " {0}, got: {1}".format(initial_lineno, line)) if not fnmatch(lines[initial_lineno + 1].strip(), "X*Y*Z"): raise IOError("was expecting X Y Z on line:" " {0}, got: {1}".format(initial_lineno + 1, lines[initial_lineno + 1])) if "crystallographic_cell" not in data: data["crystallographic_cell"] = {} if "cell_vectors" in data["crystallographic_cell"]: raise IOError("found multiple cell vectors on line:" " {0}, got: {1}".format(initial_lineno + 1, lines[initial_lineno + 1])) vectors = { "a": split_numbers(lines[initial_lineno + 2]), "b": split_numbers(lines[initial_lineno + 3]), "c": split_numbers(lines[initial_lineno + 4]), } data["primitive_cell"]["cell_vectors"] = vectors
def get_schema(eltypes=None, trtypes=None): """ Parameters ---------- eltypes: list or None element schemas to allow (if None, allow all) trtypes: list or None transform schemas to allow (if None, allow all) Returns ------- """ eschemas = list(_element_schema.values()) if eltypes is None else [ _element_schema[e] for e in eltypes ] tschemas = list(_transform_schema.values()) if trtypes is None else [ _transform_schema[e] for e in trtypes ] vschema = { 'type': 'object', 'required': ['elements', 'transforms'], 'properties': { 'elements': { 'type': 'array', 'items': { 'type': 'object', 'required': ['transforms'], 'properties': { 'transforms': { 'type': 'array', 'items': { # 'type': 'object', # 'required': ['type'], # 'oneOf': tschemas }, }, }, # 'oneOf': eschemas }, }, 'transforms': { 'type': 'array', 'items': { # 'type': 'object', # 'required': ['type'], # 'oneOf': tschemas }, } } } if len(tschemas) > 1: edict.indexes( vschema, ["properties", "transforms", "items"])["oneOf"] = tschemas edict.indexes(vschema, [ "properties", "elements", "items", "properties", "transforms", "items" ])["oneOf"] = tschemas else: vschema["properties"]["transforms"]["items"] = tschemas[0] edict.indexes( vschema, ["properties", "elements", "items", "properties", "transforms" ])["items"] = tschemas[0] if len(eschemas) > 1: edict.indexes(vschema, ["properties", "elements", "items"])["oneOf"] = eschemas else: vschema["properties"]["elements"]["items"] = edict.merge( [vschema["properties"]["elements"]["items"], eschemas[0]], append=True) # edict.pprint(vschema, depth=None) return vschema