def parse_output( self, outfiles: Dict[str, str], input_model: "AtomicInput" ) -> "AtomicResult": # lgtm: [py/similar-function] stdout = outfiles.pop("stdout") qcvars, gradient, hessian = harvest(input_model.molecule, stdout, **outfiles) if gradient is not None: qcvars["CURRENT GRADIENT"] = gradient if hessian is not None: qcvars["CURRENT HESSIAN"] = hessian retres = qcvars[f"CURRENT {input_model.driver.upper()}"] if isinstance(retres, Decimal): retres = float(retres) build_out(qcvars) atprop = build_atomicproperties(qcvars) output_data = input_model.dict() output_data["extras"]["outfiles"] = outfiles output_data["properties"] = atprop output_data["provenance"] = Provenance(creator="Turbomole", version=self.get_version(), routine="turbomole") output_data["return_result"] = retres output_data["stdout"] = stdout output_data["success"] = True return AtomicResult(**output_data)
def parse_output(self, outfiles: Dict[str, str], input_model: "AtomicInput") -> "AtomicResult": # gamessmol, if it exists, is dinky, just a clue to geometry of gamess results qcvars, gamessgrad, gamessmol = harvest(input_model.molecule, outfiles["stdout"]) if gamessgrad is not None: qcvars["CURRENT GRADIENT"] = gamessgrad qcvars = unnp(qcvars, flat=True) output_data = { "schema_name": "qcschema_output", "molecule": gamessmol, "schema_version": 1, "extras": {}, "properties": { "nuclear_repulsion_energy": gamessmol.nuclear_repulsion_energy() }, "return_result": qcvars[f"CURRENT {input_model.driver.upper()}"], "stdout": outfiles["stdout"], } # got to even out who needs plump/flat/Decimal/float/ndarray/list output_data["extras"]["qcvars"] = { k.upper(): float(v) if isinstance(v, Decimal) else v for k, v in qcel.util.unnp(qcvars, flat=True).items() } # copy qcvars into schema where possible qcvars_to_properties = { "DFT XC ENERGY": "scf_xc_energy", "ONE-ELECTRON ENERGY": "scf_one_electron_energy", "TWO-ELECTRON ENERGY": "scf_two_electron_energy", "SCF TOTAL ENERGY": "scf_total_energy", "MP2 CORRELATION ENERGY": "mp2_correlation_energy", "MP2 TOTAL ENERGY": "mp2_total_energy", "CCSD CORRELATION ENERGY": "ccsd_correlation_energy", "CCSD TOTAL ENERGY": "ccsd_total_energy", "CCSD(T) CORRELATION ENERGY": "ccsd_prt_pr_correlation_energy", "CCSD(T) TOTAL ENERGY": "ccsd_prt_pr_total_energy", } for qcvar in qcvars: if qcvar in qcvars_to_properties: output_data["properties"][ qcvars_to_properties[qcvar]] = qcvars[qcvar] if {"SCF DIPOLE X", "SCF DIPOLE Y", "SCF DIPOLE Z"} & set( qcvars.keys()): conv = Decimal( qcel.constants.conversion_factor("debye", "e * bohr")) output_data["properties"]["scf_dipole_moment"] = [ qcvars["SCF DIPOLE X"] * conv, qcvars["SCF DIPOLE Y"] * conv, qcvars["SCF DIPOLE Z"] * conv, ] output_data["success"] = True return AtomicResult(**{**input_model.dict(), **output_data})
def harvest_as_atomic_result(input_model: OptimizationInput, nwout: str) -> List[AtomicResult]: """Parse each step in the geometry relaxation as a separate AtomicResult Args: input_model: Input specification for the relaxation nwout: Standard out from the NWChem simulation Returns: A list of the results at each step """ # Parse the files out_psivars, out_mols, out_grads, version, error = harvest_output(nwout) # Make atomic results results = [] for qcvars, nwgrad, out_mol in zip(out_psivars, out_grads, out_mols): if nwgrad is not None: qcvars[ f"{input_model.input_specification.model.method.upper()[4:]} TOTAL GRADIENT"] = nwgrad qcvars["CURRENT GRADIENT"] = nwgrad # Get the formatted properties build_out(qcvars) atprop = build_atomicproperties(qcvars) # Format them inout an output output_data = { "schema_version": 1, "molecule": out_mol, "driver": "gradient", "extras": input_model.extras.copy(), "model": input_model.input_specification.model, "keywords": input_model.input_specification.keywords, "properties": atprop, "provenance": Provenance(creator="NWChem", version=version, routine="nwchem_opt"), "return_result": nwgrad, "success": True, } # got to even out who needs plump/flat/Decimal/float/ndarray/list # Decimal --> str preserves precision output_data["extras"]["qcvars"] = { k.upper(): str(v) if isinstance(v, Decimal) else v for k, v in unnp(qcvars, flat=True).items() } results.append(AtomicResult(**output_data)) return results
def parse_output( self, outfiles: Dict[str, str], input_model: "AtomicInput" ) -> "AtomicResult": # lgtm: [py/similar-function] # Get the stdout from the calculation (required) stdout = outfiles.pop("stdout") # Read the NWChem stdout file and, if needed, the hess or grad files qcvars, nwhess, nwgrad, nwmol, version, errorTMP = harvest( input_model.molecule, stdout, **outfiles) if nwgrad is not None: qcvars["CURRENT GRADIENT"] = nwgrad if nwhess is not None: qcvars["CURRENT HESSIAN"] = nwhess # Normalize the output as a float or list of floats retres = qcvars[f"CURRENT {input_model.driver.upper()}"] if isinstance(retres, Decimal): retres = float(retres) elif isinstance(retres, np.ndarray): retres = retres.tolist() # Get the formatted properties qcprops = extract_formatted_properties(qcvars) # Format them inout an output output_data = { "schema_name": "qcschema_output", "schema_version": 1, "extras": { "outfiles": outfiles, **input_model.extras }, "properties": qcprops, "provenance": Provenance(creator="NWChem", version=self.get_version(), routine="nwchem"), "return_result": retres, "stdout": stdout, } # got to even out who needs plump/flat/Decimal/float/ndarray/list # Decimal --> str preserves precision output_data["extras"]["qcvars"] = { k.upper(): str(v) if isinstance(v, Decimal) else v for k, v in qcel.util.unnp(qcvars, flat=True).items() } output_data["success"] = True return AtomicResult(**{**input_model.dict(), **output_data})
def compute(self, input_data: "AtomicInput", config: "TaskConfig") -> "AtomicResult": """ Runs RDKit in FF typing """ self.found(raise_error=True) import rdkit from rdkit.Chem import AllChem # Failure flag ret_data = {"success": False} # Build the Molecule jmol = input_data.molecule mol = self._process_molecule_rdkit(jmol) if input_data.model.method.lower() == "uff": ff = AllChem.UFFGetMoleculeForceField(mol) all_params = AllChem.UFFHasAllMoleculeParams(mol) else: raise InputError("RDKit only supports the UFF method currently.") if all_params is False: raise InputError( "RDKit parameters not found for all atom types in molecule.") ff.Initialize() ret_data["properties"] = { "return_energy": ff.CalcEnergy() * ureg.conversion_factor("kJ / mol", "hartree") } if input_data.driver == "energy": ret_data["return_result"] = ret_data["properties"]["return_energy"] elif input_data.driver == "gradient": coef = ureg.conversion_factor("kJ / mol", "hartree") * ureg.conversion_factor( "angstrom", "bohr") ret_data["return_result"] = [x * coef for x in ff.CalcGrad()] else: raise InputError( f"RDKit can only compute energy and gradient driver methods. Found {input_data.driver}." ) ret_data["provenance"] = Provenance( creator="rdkit", version=rdkit.__version__, routine="rdkit.Chem.AllChem.UFFGetMoleculeForceField") ret_data["schema_name"] = "qcschema_output" ret_data["success"] = True # Form up a dict first, then sent to BaseModel to avoid repeat kwargs which don't override each other return AtomicResult(**{**input_data.dict(), **ret_data})
def parse_output( self, outfiles: Dict[str, str], input_model: "AtomicInput" ) -> AtomicResult: # lgtm: [py/similar-function] # Get the stdout from the calculation (required) stdout = outfiles.pop("stdout") stderr = outfiles.pop("stderr") # Read the NWChem stdout file and, if needed, the hess or grad files try: qcvars, nwhess, nwgrad, nwmol, version, errorTMP = harvest(input_model.molecule, stdout, **outfiles) except Exception as e: raise UnknownError(stdout) if nwgrad is not None: qcvars[f"{input_model.model.method.upper()[4:]} TOTAL GRADIENT"] = nwgrad qcvars["CURRENT GRADIENT"] = nwgrad if nwhess is not None: qcvars["CURRENT HESSIAN"] = nwhess # Normalize the output as a float or list of floats if input_model.driver.upper() == "PROPERTIES": retres = qcvars[f"CURRENT ENERGY"] else: retres = qcvars[f"CURRENT {input_model.driver.upper()}"] if isinstance(retres, Decimal): retres = float(retres) elif isinstance(retres, np.ndarray): retres = retres.tolist() # Get the formatted properties build_out(qcvars) atprop = build_atomicproperties(qcvars) # Format them inout an output output_data = { "schema_version": 1, "extras": {"outfiles": outfiles, **input_model.extras}, "properties": atprop, "provenance": Provenance(creator="NWChem", version=self.get_version(), routine="nwchem"), "return_result": retres, "stderr": stderr, "stdout": stdout, "success": True, } # got to even out who needs plump/flat/Decimal/float/ndarray/list # Decimal --> str preserves precision # * formerly unnp(qcvars, flat=True).items() output_data["extras"]["qcvars"] = { k.upper(): str(v) if isinstance(v, Decimal) else v for k, v in qcvars.items() } return AtomicResult(**{**input_model.dict(), **output_data})
def parse_output( self, outfiles: Dict[str, str], input_model: AtomicInput ) -> AtomicResult: # lgtm: [py/similar-function] stdout = outfiles.pop("stdout") stderr = outfiles.pop("stderr") # c4mol, if it exists, is dinky, just a clue to geometry of cfour results try: qcvars, c4hess, c4grad, c4mol, version, errorTMP = harvest(input_model.molecule, stdout, **outfiles) except Exception as e: raise UnknownError(stdout) if c4grad is not None: qcvars["CURRENT GRADIENT"] = c4grad qcvars[f"{input_model.model.method.upper()[3:]} TOTAL GRADIENT"] = c4grad if c4hess is not None: qcvars[f"{input_model.model.method.upper()[3:]} TOTAL HESSIAN"] = c4hess qcvars["CURRENT HESSIAN"] = c4hess if input_model.driver.upper() == "PROPERTIES": retres = qcvars[f"CURRENT ENERGY"] else: retres = qcvars[f"CURRENT {input_model.driver.upper()}"] if isinstance(retres, Decimal): retres = float(retres) elif isinstance(retres, np.ndarray): retres = retres.ravel().tolist() build_out(qcvars) atprop = build_atomicproperties(qcvars) output_data = { "schema_version": 1, "extras": {"outfiles": outfiles, **input_model.extras}, "properties": atprop, "provenance": Provenance(creator="CFOUR", version=self.get_version(), routine="xcfour"), "return_result": retres, "stderr": stderr, "stdout": stdout, "success": True, } # got to even out who needs plump/flat/Decimal/float/ndarray/list # Decimal --> str preserves precision # * formerly unnp(qcvars, flat=True).items() output_data["extras"]["qcvars"] = { k.upper(): str(v) if isinstance(v, Decimal) else v for k, v in qcvars.items() } return AtomicResult(**{**input_model.dict(), **output_data})
def parse_output(self, outfiles: Dict[str, str], input_model: AtomicInput) -> AtomicResult: """ For the set of output files parse them to extract as much info as possible and return the atomic result. From the fchk file we get the energy and hessian, the gradient is taken from the log file. """ properties = {} qcvars = {} # make sure we got valid exit status self.check_convergence(logfile=outfiles["gaussian.log"]) version = self.parse_version(logfile=outfiles["gaussian.log"]) # build the main data dict output_data = input_model.dict() provenance = { "version": version, "creator": "gaussian", "routine": "CLI" } # collect the total energy from the fchk file logfile = outfiles["lig.fchk"] for line in logfile.split("\n"): if "Total Energy" in line: energy = float(line.split()[3]) properties["return_energy"] = energy properties["scf_total_energy"] = energy if input_model.driver == "energy": output_data["return_result"] = energy if input_model.driver == "gradient": # now we need to parse out the forces gradient = self.parse_gradient(fchfile=outfiles["lig.fchk"]) output_data["return_result"] = gradient elif input_model.driver == "hessian": hessian = self.parse_hessian(fchkfile=outfiles["lig.fchk"]) output_data["return_result"] = hessian # parse scf_properties if "scf_properties" in input_model.keywords: qcvars["WIBERG_LOWDIN_INDICES"] = self.parse_wbo( logfile=outfiles["gaussian.log"], natoms=len(input_model.molecule.symbols), ) # if there is an extra output file grab it if "gaussian.wfx" in outfiles: output_data["extras"]["gaussian.wfx"] = outfiles["gaussian.wfx"] if qcvars: output_data["extras"]["qcvars"] = qcvars output_data["properties"] = properties output_data["schema_name"] = "qcschema_output" output_data["stdout"] = outfiles["gaussian.log"] output_data["success"] = True output_data["provenance"] = provenance return AtomicResult(**output_data)
def _compute(self, driver): import psi4 inp = self.generate_schema_input(driver) if "1.3" in psi4.__version__: ret = psi4.json_wrapper.run_json_qcschema(inp.dict(), clean=True) else: ret = psi4.schema_wrapper.run_json_qcschema( inp.dict(), clean=True, json_serialization=True) ret = AtomicResult(**ret) return ret
def parse_output( self, outfiles: Dict[str, str], input_model: "AtomicInput" ) -> "AtomicResult": # lgtm: [py/similar-function] stdout = outfiles.pop("stdout") # c4mol, if it exists, is dinky, just a clue to geometry of cfour results qcvars, c4hess, c4grad, c4mol, version, errorTMP = harvest( input_model.molecule, stdout, **outfiles) if c4grad is not None: qcvars["CURRENT GRADIENT"] = c4grad if c4hess is not None: qcvars["CURRENT HESSIAN"] = c4hess retres = qcvars[f"CURRENT {input_model.driver.upper()}"] if isinstance(retres, Decimal): retres = float(retres) elif isinstance(retres, np.ndarray): retres = retres.ravel().tolist() output_data = { "schema_name": "qcschema_output", "schema_version": 1, "extras": { "outfiles": outfiles }, "properties": {}, "provenance": Provenance(creator="CFOUR", version=self.get_version(), routine="xcfour"), "return_result": retres, "stdout": stdout, } # got to even out who needs plump/flat/Decimal/float/ndarray/list # Decimal --> str preserves precision output_data["extras"]["qcvars"] = { k.upper(): str(v) if isinstance(v, Decimal) else v for k, v in qcel.util.unnp(qcvars, flat=True).items() } output_data["success"] = True return AtomicResult(**{**input_model.dict(), **output_data})
def parse_output(self, outfiles: Dict[str, str], input_model: "AtomicInput") -> AtomicResult: output_data = {} bdata = {} outtext = "" for k, v in outfiles.items(): if k == "dispatch.out": outtext = v continue if v is None: continue bdata[k] = np.frombuffer(v) if input_model.driver == "energy": output_data["return_result"] = bdata["99.0"][-1] elif input_model.driver == "gradient": output_data["return_result"] = bdata["131.0"] elif input_model.driver == "hessian": output_data["return_result"] = bdata["132.0"] else: raise ValueError( f"Could not parse driver of type {input_model.driver}.") properties = { "nuclear_repulsion_energy": bdata["99.0"][0], "scf_total_energy": bdata["99.0"][1], "return_energy": bdata["99.0"][-1], } _mp2_methods = {"mp2", "rimp2"} if input_model.model.method.lower() in _mp2_methods: properties["mp2_total_energy"] = properties["return_energy"] # Correct CCSD because its odd? # if input_model.model.method.lower() == "ccsd": # m1 = re.findall(" CCSD correlation energy.+=.+\d+\.\d+", outfiles["dispatch.out"]) # m2 = re.findall(" CCSD total energy.+=.+\d+\.\d+", outfiles["dispatch.out"]) props, prov = self._parse_logfile_common(outtext, input_model.dict()) output_data["provenance"] = prov output_data["properties"] = properties output_data["properties"].update(props) output_data["stdout"] = outfiles["dispatch.out"] output_data["success"] = True return AtomicResult(**{**input_model.dict(), **output_data})
def _compute(self, driver): logger = logging.getLogger(__name__) logger.info("UserComputer only returning provided values") E = self.external_energy gX = self.external_gradient HX = self.external_hessian if driver == "hessian": if HX is None or gX is None or E is None: raise OptError("Must provide hessian, gradient, and energy.") elif driver == "gradient": if gX is None or E is None: raise OptError("Must provide gradient and energy.") elif driver == "energy": if E is None: raise OptError("Must provide energy.") result = deepcopy(UserComputer.output_skeleton) result["driver"] = driver mol = Molecule(**self.molecule) result["molecule"] = mol NRE = mol.nuclear_repulsion_energy() result["properties"]["nuclear_repulsion_energy"] = NRE result["extras"]["qcvars"]["NUCLEAR REPULSION ENERGY"] = NRE result["properties"]["return_energy"] = E result["extras"]["qcvars"]["CURRENT ENERGY"] = E if driver in ["gradient", "hessian"]: result["extras"]["qcvars"]["CURRENT GRADIENT"] = gX if driver == "hessian": result["extras"]["qcvars"]["CURRENT HESSIAN"] = HX if driver == "energy": result["return_result"] = E elif driver == "gradient": result["return_result"] = gX elif driver == "hessian": result["return_result"] = HX # maybe do this to protect against repeatedly going back for same? self.external_energy = None self.external_gradient = None self.external_hessian = None return AtomicResult(**result)
def parse_output(self, outfiles: Dict[str, str], input_model: AtomicInput) -> AtomicResult: # Get the stdout from the calculation (required) stdout = outfiles.pop("stdout") stderr = outfiles.pop("stderr") # gamessmol, if it exists, is dinky, just a clue to geometry of gamess results qcvars, gamessgrad, gamessmol = harvest(input_model.molecule, stdout, **outfiles) if gamessgrad is not None: qcvars["CURRENT GRADIENT"] = gamessgrad if input_model.driver.upper() == "PROPERTIES": retres = qcvars[f"CURRENT ENERGY"] else: retres = qcvars[f"CURRENT {input_model.driver.upper()}"] build_out(qcvars) atprop = build_atomicproperties(qcvars) output_data = { "schema_version": 1, "molecule": gamessmol, "extras": {"outfiles": outfiles, **input_model.extras}, "properties": atprop, "provenance": Provenance(creator="GAMESS", version=self.get_version(), routine="rungms"), "return_result": retres, "stderr": stderr, "stdout": stdout, "success": True, } # got to even out who needs plump/flat/Decimal/float/ndarray/list output_data["extras"]["qcvars"] = { k.upper(): str(v) if isinstance(v, Decimal) else v for k, v in unnp(qcvars, flat=True).items() } return AtomicResult(**{**input_model.dict(), **output_data})
def test_add_data(): md = MoleculeData.from_identifier("O") # Load the xtb geometry xtb_geom = OptimizationResult.parse_file( _my_path.joinpath('records/xtb-neutral.json')) md.add_geometry(xtb_geom) assert "xtb" in md.data assert "neutral" in md.data["xtb"] assert isclose(md.data["xtb"][ OxidationState.NEUTRAL].atomization_energy["xtb-no_zpe"], -0.515, abs_tol=1e-2) assert ("xtb", "neutral") == md.match_geometry(xtb_geom.final_molecule) # Load in a relaxed oxidized geometry xtb_geom_ox = OptimizationResult.parse_file( _my_path.joinpath('records/xtb-oxidized.json')) md.add_geometry(xtb_geom_ox) assert "xtb" in md.data assert "oxidized" in md.data["xtb"] assert ("xtb", "neutral") == md.match_geometry(xtb_geom_ox.initial_molecule) assert ("xtb", "oxidized") == md.match_geometry(xtb_geom_ox.final_molecule) assert md.data['xtb'][OxidationState.OXIDIZED].total_energy[OxidationState.OXIDIZED]['xtb'] != \ md.data['xtb'][OxidationState.NEUTRAL].total_energy[OxidationState.OXIDIZED]['xtb'] # Load in a oxidized energy for the neutral structure xtb_energy = AtomicResult.parse_file( _my_path.joinpath('records/xtb-neutral_xtb-oxidized-energy.json')) md.add_single_point(xtb_energy) # Add in solvation energies xtb_energy = AtomicResult.parse_file( _my_path.joinpath('records/xtb-neutral_acn.json')) md.add_single_point(xtb_energy) assert "acetonitrile" in md.data['xtb']['neutral'].solvation_energy[ 'neutral'] xtb_energy = AtomicResult.parse_file( _my_path.joinpath('records/xtb-oxidized_acn.json')) md.add_single_point(xtb_energy) assert "acetonitrile" in md.data['xtb']['oxidized'].solvation_energy[ 'oxidized'] # Show that we can compute a redox potential recipe = RedoxEnergyRecipe(name="xtb-vertical", geometry_level="xtb", energy_level="xtb", adiabatic=False) result = recipe.compute_redox_potential(md, OxidationState.OXIDIZED) assert md.oxidation_potential['xtb-vertical'] == result recipe = RedoxEnergyRecipe(name="xtb", geometry_level="xtb", energy_level="xtb", adiabatic=True) result = recipe.compute_redox_potential(md, OxidationState.OXIDIZED) assert md.oxidation_potential['xtb'] == result assert md.oxidation_potential['xtb'] < md.oxidation_potential[ 'xtb-vertical'] recipe = RedoxEnergyRecipe(name="xtb-acn", geometry_level="xtb", energy_level="xtb", adiabatic=True, solvent='acetonitrile', solvation_level='xtb') result = recipe.compute_redox_potential(md, OxidationState.OXIDIZED) assert md.oxidation_potential['xtb-acn'] == result assert md.oxidation_potential['xtb-acn'] != md.oxidation_potential['xtb'] # Add a single point small_basis computation smb_hessian = AtomicResult.parse_file( _my_path.joinpath('records/xtb-neutral_smb-neutral-hessian.json')) md.add_single_point(smb_hessian) assert isclose(md.data["xtb"][OxidationState.NEUTRAL].zpe[ OxidationState.NEUTRAL]['small_basis'], 0.02155, abs_tol=1e-3) # Add an NWChem with solvent smb_solvent = AtomicResult.parse_file( _my_path.joinpath('records/xtb-neutral_smb-neutral_water.json')) md.add_single_point(smb_solvent) assert 'small_basis' in md.data['xtb']['neutral'].total_energy_in_solvent[ 'neutral']['water']
def parse_output(self, outfiles: Dict[str, str], input_model: "AtomicInput") -> "AtomicResult": keep_keys = { "heat_of_formation", "energy_electronic", "energy_nuclear", "gradient_norm", "dip_vec", "spin_component", "total_spin", "molecular_weight", "molecular_weight", "total_energy", "gradients", "mopac_version", "atom_charges", "point_group", } # Convert back to atomic units conversions = { "KCAL/MOL": 1 / self.extras["hartree_to_kcalmol"], "KCAL/MOL/ANGSTROM": self.extras["bohr_to_angstroms"] / self.extras["hartree_to_kcalmol"], "EV": 1 / self.extras["hartree_to_ev"], "DEBYE": 1 / self.extras["au_to_debye"], "AMU": 1, None: 1, } data = {} last_key = None # Parse the weird structure if outfiles["dispatch.aux"] is None: error = "An unknown error occured and no results were captured." if outfiles["dispatch.out"] is not None: error = outfiles["dispatch.out"] raise UnknownError(error) for line in outfiles["dispatch.aux"].splitlines(): if ("START" in line) or ("END" in line) or ("#" in line): continue if "=" in line: # Primary split key, value = line.split("=", 1) # Format key, may have units # IONIZATION_POTENTIAL:EV # GRADIENTS:KCAL/MOL/ANGSTROM[09] key_list = key.split(":", 1) if len(key_list) == 1: key, units = key_list[0], None else: key, units = key.split(":", 1) # Pop off [xx] items if units and "[" in units: units, _ = units.split("[", 1) if "[" in key: key, _ = key.split("[", 1) key = key.strip().lower() last_key = key # Skip keys that are not useful if key not in keep_keys: last_key = None continue # 1D+3 -> 1E3 conversion cf = conversions[units] value = value.strip().replace("D+", "E+").replace("D-", "E-") if ("E+" in value) or ("E-" in value): if value.count("E") > 1: value = [float(x) * cf for x in value.split()] else: value = float(value) * cf if value == "": value = [] data[key] = (cf, value) else: if last_key is None: continue cf = data[last_key][0] data[last_key][1].extend([float(x) * cf for x in line.split()]) data = {k: v[1] for k, v in data.items()} if ("gradients" not in data) or ("mopac_version" not in data): raise UnknownError( "Could not correctly parse the MOPAC output file.") gradient = data.pop("gradients") output = input_model.dict() output["provenance"] = { "creator": "mopac", "version": data.pop("mopac_version") } output["properties"] = {} output["properties"]["return_energy"] = data["heat_of_formation"] output["extras"].update(data) if input_model.driver == "energy": output["return_result"] = data["heat_of_formation"] else: output["return_result"] = gradient output["stdout"] = outfiles["dispatch.out"] output["success"] = True return AtomicResult(**output)
def parse_output(self, output: Dict[str, Any], input_model: "AtomicInput") -> "AtomicResult": wavefunction_map = { "orbitals_alpha": "scf_orbitals_a", "orbitals_beta": "scf_orbitals_b", "density_alpha": "scf_density_a", "density_beta": "scf_density_b", "fock_alpha": "scf_fock_a", "fock_beta": "scf_fock_b", "eigenvalues_alpha": "scf_eigenvalues_a", "eigenvalues_beta": "scf_eigenvalues_b", "occupations_alpha": "scf_occupations_a", "occupations_beta": "scf_occupations_b", } output_data = input_model.dict() output_data["return_result"] = output[input_model.driver.value] # Always build a wavefunction, it will be stripped obas = output["wavefunction"]["ao_basis"] for k, center in obas["center_data"].items(): # Convert basis set, cannot handle arrays for shell in center["electron_shells"]: shell.pop("normalized_primitives", None) for el_k in ["coefficients", "exponents", "angular_momentum"]: shell[el_k] = shell[el_k].tolist() if center["ecp_potentials"] is not None: for shell in center["ecp_potentials"]: shell.pop("ecp_potentials", None) for ecp_k in [ "angular_momentum", "r_exponents", "gaussian_exponents", "coefficients" ]: shell[ecp_k] = shell[ecp_k].tolist() basis_set = BasisSet(name=str(input_model.model.basis), center_data=obas["center_data"], atom_map=obas["atom_map"]) wavefunction = {"basis": basis_set} for key, qcschema_key in wavefunction_map.items(): qcore_data = output["wavefunction"].get(key, None) if qcore_data is None: continue if ("density" in key) or ("fock" in key): qcore_data = reorder_row_and_column_ao_indices( qcore_data, basis_set, self._qcore_to_cca_ao_order) # Handles orbitals and 1D elif "orbitals" in key: qcore_data = reorder_column_ao_indices( qcore_data, basis_set, self._qcore_to_cca_ao_order) elif "eigenvalues" in key: qcore_data = reorder_column_ao_indices( qcore_data.reshape(1, -1), basis_set, self._qcore_to_cca_ao_order).ravel() elif "occupations" in key: tmp = np.zeros(basis_set.nbf) tmp[:qcore_data.shape[0]] = qcore_data qcore_data = reorder_column_ao_indices( tmp.reshape(1, -1), basis_set, self._qcore_to_cca_ao_order).ravel() else: raise KeyError("Wavefunction conversion key not understood") wavefunction[qcschema_key] = qcore_data wavefunction["restricted"] = True if "scf_eigenvalues_b" in wavefunction: wavefunction["restricted"] = False output_data["wavefunction"] = wavefunction # Handle remaining top level keys properties = { "calcinfo_nbasis": basis_set.nbf, "calcinfo_nmo": basis_set.nbf, "calcinfo_nalpha": np.sum(wavefunction["scf_occupations_a"] > 0), "calcinfo_natom": input_model.molecule.symbols.shape[0], "return_energy": output["energy"], } if wavefunction["restricted"]: properties["calcinfo_nbeta"] = properties["calcinfo_nalpha"] else: properties["calcinfo_nbeta"] = np.sum( wavefunction["scf_occupations_b"] > 0) output_data["properties"] = properties output_data["schema_name"] = "qcschema_output" output_data["success"] = True return AtomicResult(**output_data)
def parse_output(self, outfiles: Dict[str, str], input_model: "AtomicInput") -> "AtomicResult": output_data = {} properties = {} # Parse the output file, collect properties and gradient output_lines = outfiles["tc.out"].split("\n") gradients = [] natom = 0 line_final_energy = -1 line_scf_header = -1 for idx, line in enumerate(output_lines): if "FINAL ENERGY" in line: properties["scf_total_energy"] = float(line.strip("\n").split()[2]) line_final_energy = idx elif "Start SCF Iterations" in line: line_scf_header = idx elif "Total atoms" in line: natom = int(line.split()[-1]) elif "DIPOLE MOMENT" in line: newline = line.replace(",", "").replace("}", "").replace("{", "") properties["scf_dipole_moment"] = [float(x) for x in newline.split()[2:5]] elif "Nuclear repulsion energy" in line: properties["nuclear_repulsion_energy"] = float(line.split()[-2]) elif "Gradient units are Hartree/Bohr" in line: # Gradient is stored as (dE/dx1,dE/dy1,dE/dz1,dE/dx2,dE/dy2,...) for i in range(idx + 3, idx + 3 + natom): grad = output_lines[i].strip("\n").split() for x in grad: gradients.append(float(x)) last_scf_line = "" for idx in reversed(range(line_scf_header, line_final_energy)): mobj = re.search( r"^\s*\d+\s+" + DECIMAL + r"\s+" + DECIMAL + r"\s+" + DECIMAL + r"\s+" + DECIMAL, output_lines[idx], re.VERBOSE, ) if mobj: last_scf_line = output_lines[idx] break if len(last_scf_line) > 0: properties["scf_iterations"] = int(last_scf_line.split()[0]) if "XC Energy" in output_lines: properties["scf_xc_energy"] = float(last_scf_line.split()[4]) else: raise UnknownError("SCF iteration lines not found in TeraChem output") if len(gradients) > 0: output_data["return_result"] = gradients # Commented out the properties currently not supported by QCSchema # properites["spin_S2"] = 1 # calculated S(S+1) # elif "SPIN S-SQUARED" in line: # properties["spin_S2"] = float(line.strip('\n').split()[2]) # Parse files in scratch folder # properties["atomic_charge"] = [] # atomic_charge_lines = open(outfiles["charge.xls"]).readlines() # for line in atomic_charge_lines: # properties["atomic_charge"].append(line.strip('\n').split()[-1]) if "return_result" not in output_data: if "scf_total_energy" in properties: output_data["return_result"] = properties["scf_total_energy"] else: raise KeyError("Could not find SCF total energy") output_data["properties"] = properties output_data["schema_name"] = "qcschema_output" output_data["stdout"] = outfiles["tc.out"] # TODO Should only return True if TeraChem calculation terminated properly output_data["success"] = True # return extra files requested by user as extras for extra in input_model.extras.keys(): input_model.extras[extra] = outfiles[extra] return AtomicResult(**{**input_model.dict(), **output_data})
def parse_output(self, outfiles: Dict[str, str], input_model: "AtomicInput") -> "AtomicResult": stdout = outfiles.pop("stdout") for fl, contents in outfiles.items(): if contents is not None: # LOG text += f'\n MP2D scratch file {fl} has been read.\n' pass # parse energy output (could go further and break into UCHF, CKS) real = np.array(input_model.molecule.real) full_nat = real.shape[0] real_nat = np.sum(real) for ln in stdout.splitlines(): if re.match(" MP2D dispersion correction Eh", ln): ene = Decimal(ln.split()[4]) elif re.match("Atomic Coordinates in Angstroms", ln): break else: if not ((real_nat == 1) and (input_model.driver == "gradient")): raise UnknownError("Unknown issue occured.") # parse gradient output if outfiles["mp2d_gradient"] is not None: srealgrad = outfiles["mp2d_gradient"] realgrad = np.fromstring(srealgrad, count=3 * real_nat, sep=" ").reshape((-1, 3)) if input_model.driver == "gradient": ireal = np.argwhere(real).reshape((-1)) fullgrad = np.zeros((full_nat, 3)) try: fullgrad[ireal, :] = realgrad except NameError as exc: raise UnknownError( "Unsuccessful gradient collection.") from exc qcvkey = input_model.extras["info"]["fctldash"].upper() calcinfo = [] calcinfo.append(qcel.Datum("CURRENT ENERGY", "Eh", ene)) calcinfo.append(qcel.Datum("DISPERSION CORRECTION ENERGY", "Eh", ene)) calcinfo.append( qcel.Datum("2-BODY DISPERSION CORRECTION ENERGY", "Eh", ene)) if qcvkey: calcinfo.append( qcel.Datum(f"{qcvkey} DISPERSION CORRECTION ENERGY", "Eh", ene)) if input_model.driver == "gradient": calcinfo.append(qcel.Datum("CURRENT GRADIENT", "Eh/a0", fullgrad)) calcinfo.append( qcel.Datum("DISPERSION CORRECTION GRADIENT", "Eh/a0", fullgrad)) calcinfo.append( qcel.Datum("2-BODY DISPERSION CORRECTION GRADIENT", "Eh/a0", fullgrad)) if qcvkey: calcinfo.append( qcel.Datum(f"{qcvkey} DISPERSION CORRECTION GRADIENT", "Eh/a0", fullgrad)) # LOGtext += qcel.datum.print_variables({info.label: info for info in calcinfo}) calcinfo = {info.label: info.data for info in calcinfo} # calcinfo = qcel.util.unnp(calcinfo, flat=True) # got to even out who needs plump/flat/Decimal/float/ndarray/list # Decimal --> str preserves precision calcinfo = { k.upper(): str(v) if isinstance(v, Decimal) else v for k, v in calcinfo.items() } # jobrec['properties'] = {"return_energy": ene} # jobrec["molecule"]["real"] = list(jobrec["molecule"]["real"]) retres = calcinfo[f"CURRENT {input_model.driver.upper()}"] if isinstance(retres, Decimal): retres = float(retres) elif isinstance(retres, np.ndarray): retres = retres.ravel().tolist() output_data = { "extras": input_model.extras, "properties": {}, "provenance": Provenance(creator="MP2D", version=self.get_version(), routine=__name__ + "." + sys._getframe().f_code.co_name), "return_result": retres, "stdout": stdout, } output_data["extras"]["local_keywords"] = input_model.extras["info"] output_data["extras"]["qcvars"] = calcinfo output_data["success"] = True return AtomicResult(**{**input_model.dict(), **output_data})
def parse_output(self, outfiles: Dict[str, str], input_model: "AtomicInput") -> "AtomicResult": Grimme_h2kcal = 627.509541 stdout = outfiles.pop("stdout") for fl, contents in outfiles.items(): if contents is not None: # LOG text += f'\n DFTD3 scratch file {fl} has been read.\n' pass # parse energy output (could go further and break into E6, E8, E10 and Cn coeff) real = np.array(input_model.molecule.real) full_nat = real.shape[0] real_nat = np.sum(real) for ln in stdout.splitlines(): if re.match(" Edisp /kcal,au", ln): ene = Decimal(ln.split()[3]) elif re.match(r" E6\(ABC\) \" :", ln): # c. v3.2.0 raise ResourceError("Cannot process ATM results from DFTD3 prior to v3.2.1.") elif re.match(r""" E6\(ABC\) /kcal,au:""", ln): atm = Decimal(ln.split()[-1]) elif re.match(" analysis of pair-wise terms", ln): D3pairs = np.zeros((full_nat, full_nat)) # Iterate over block start = stdout.splitlines().index(ln) + 2 for l in stdout.splitlines()[start:]: data = l.replace("-", " -").split() # print(data) if len(data) == 0: break atom1 = int(data[0]) - 1 atom2 = int(data[1]) - 1 Edisp = Decimal(data[-1]) D3pairs[atom1, atom2] = Edisp / Decimal(Grimme_h2kcal) D3pairs[atom2, atom1] = D3pairs[atom1, atom2] elif re.match(" normal termination of dftd3", ln): break else: if not ((real_nat == 1) and (input_model.driver == "gradient")): raise UnknownError( f"Unsuccessful run. Check input, particularly geometry in [a0]. Model: {input_model.model}" ) # parse gradient output # * DFTD3 crashes on one-atom gradients. Avoid the error (above) and just force the correct result (below). if outfiles["dftd3_gradient"] is not None: srealgrad = outfiles["dftd3_gradient"].replace("D", "E") realgrad = np.fromstring(srealgrad, count=3 * real_nat, sep=" ").reshape((-1, 3)) elif real_nat == 1: realgrad = np.zeros((1, 3)) if outfiles["dftd3_abc_gradient"] is not None: srealgrad = outfiles["dftd3_abc_gradient"].replace("D", "E") realgradabc = np.fromstring(srealgrad, count=3 * real_nat, sep=" ").reshape((-1, 3)) elif real_nat == 1: realgradabc = np.zeros((1, 3)) if input_model.driver == "gradient": ireal = np.argwhere(real).reshape((-1)) fullgrad = np.zeros((full_nat, 3)) rg = realgradabc if (input_model.extras["info"]["dashlevel"] == "atmgr") else realgrad try: fullgrad[ireal, :] = rg except NameError as exc: raise UnknownError("Unsuccessful gradient collection.") from exc qcvkey = input_model.extras["info"]["fctldash"].upper() calcinfo = [] if input_model.extras["info"]["dashlevel"] == "atmgr": calcinfo.append(qcel.Datum("CURRENT ENERGY", "Eh", atm)) calcinfo.append(qcel.Datum("DISPERSION CORRECTION ENERGY", "Eh", atm)) calcinfo.append(qcel.Datum("3-BODY DISPERSION CORRECTION ENERGY", "Eh", atm)) calcinfo.append(qcel.Datum("AXILROD-TELLER-MUTO 3-BODY DISPERSION CORRECTION ENERGY", "Eh", atm)) if input_model.driver == "gradient": calcinfo.append(qcel.Datum("CURRENT GRADIENT", "Eh/a0", fullgrad)) calcinfo.append(qcel.Datum("DISPERSION CORRECTION GRADIENT", "Eh/a0", fullgrad)) calcinfo.append(qcel.Datum("3-BODY DISPERSION CORRECTION GRADIENT", "Eh/a0", fullgrad)) calcinfo.append( qcel.Datum("AXILROD-TELLER-MUTO 3-BODY DISPERSION CORRECTION GRADIENT", "Eh/a0", fullgrad) ) else: calcinfo.append(qcel.Datum("CURRENT ENERGY", "Eh", ene)) calcinfo.append(qcel.Datum("DISPERSION CORRECTION ENERGY", "Eh", ene)) calcinfo.append(qcel.Datum("2-BODY DISPERSION CORRECTION ENERGY", "Eh", ene)) if qcvkey: calcinfo.append(qcel.Datum(f"{qcvkey} DISPERSION CORRECTION ENERGY", "Eh", ene)) if input_model.driver == "gradient": calcinfo.append(qcel.Datum("CURRENT GRADIENT", "Eh/a0", fullgrad)) calcinfo.append(qcel.Datum("DISPERSION CORRECTION GRADIENT", "Eh/a0", fullgrad)) calcinfo.append(qcel.Datum("2-BODY DISPERSION CORRECTION GRADIENT", "Eh/a0", fullgrad)) if qcvkey: calcinfo.append(qcel.Datum(f"{qcvkey} DISPERSION CORRECTION GRADIENT", "Eh/a0", fullgrad)) # LOGtext += qcel.datum.print_variables({info.label: info for info in calcinfo}) calcinfo = {info.label: info.data for info in calcinfo} # calcinfo = qcel.util.unnp(calcinfo, flat=True) # got to even out who needs plump/flat/Decimal/float/ndarray/list # Decimal --> str preserves precision calcinfo = { k.upper(): str(v) if isinstance(v, Decimal) else v for k, v in qcel.util.unnp(calcinfo, flat=True).items() } # jobrec['properties'] = {"return_energy": ene} # jobrec["molecule"]["real"] = list(jobrec["molecule"]["real"]) retres = calcinfo[f"CURRENT {input_model.driver.upper()}"] if isinstance(retres, Decimal): retres = float(retres) elif isinstance(retres, np.ndarray): retres = retres.ravel().tolist() output_data = { "extras": input_model.extras, "properties": {}, "provenance": Provenance( creator="DFTD3", version=self.get_version(), routine=__name__ + "." + sys._getframe().f_code.co_name ), "return_result": retres, "stdout": stdout, } output_data["extras"]["local_keywords"] = input_model.extras["info"] output_data["extras"]["qcvars"] = calcinfo if input_model.keywords.get("save_pairwise_dispersion") is True: output_data["extras"]["qcvars"]["PAIRWISE DISPERSION CORRECTION ANALYSIS"] = D3pairs output_data["success"] = True return AtomicResult(**{**input_model.dict(), **output_data})
def parse_output(self, outfiles: Dict[str, str], input_model: "AtomicInput") -> "AtomicResult": stdout = outfiles.pop("stdout") # parse energy output (could go further and break into E6, E8, E10 and Cn coeff) real = np.array(input_model.molecule.real) full_nat = real.shape[0] real_nat = np.sum(real) for ln in stdout.splitlines(): if re.match(" Egcp:", ln): ene = Decimal(ln.split()[1]) elif re.match(" normal termination of gCP", ln): break else: if self._defaults["name"] == "GCP" and not ( (real_nat == 1) and (input_model.driver == "gradient")): raise UnknownError( f"Unsuccessful run. Check input, particularly geometry in [a0]. Model: {input_model.model}" ) # parse gradient output if outfiles["gcp_gradient"] is not None: srealgrad = outfiles["gcp_gradient"].replace("D", "E") realgrad = np.fromstring(srealgrad, count=3 * real_nat, sep=" ").reshape((-1, 3)) elif real_nat == 1: realgrad = np.zeros((1, 3)) if input_model.driver == "gradient": ireal = np.argwhere(real).reshape((-1)) fullgrad = np.zeros((full_nat, 3)) try: fullgrad[ireal, :] = realgrad except NameError as exc: raise UnknownError( "Unsuccessful gradient collection.") from exc qcvkey = input_model.model.method.upper() calcinfo = [] calcinfo.append(qcel.Datum("CURRENT ENERGY", "Eh", ene)) calcinfo.append(qcel.Datum("GCP CORRECTION ENERGY", "Eh", ene)) if qcvkey: calcinfo.append( qcel.Datum(f"{qcvkey} GCP CORRECTION ENERGY", "Eh", ene)) if input_model.driver == "gradient": calcinfo.append(qcel.Datum("CURRENT GRADIENT", "Eh/a0", fullgrad)) calcinfo.append( qcel.Datum("GCP CORRECTION GRADIENT", "Eh/a0", fullgrad)) if qcvkey: calcinfo.append( qcel.Datum(f"{qcvkey} GCP CORRECTION GRADIENT", "Eh/a0", fullgrad)) calcinfo = {info.label: info.data for info in calcinfo} # Decimal --> str preserves precision calcinfo = { k.upper(): str(v) if isinstance(v, Decimal) else v for k, v in calcinfo.items() } retres = calcinfo[f"CURRENT {input_model.driver.upper()}"] if isinstance(retres, Decimal): retres = float(retres) elif isinstance(retres, np.ndarray): retres = retres.ravel().tolist() output_data = { "extras": input_model.extras, "properties": {}, "provenance": Provenance(creator="GCP", version=self.get_version(), routine=__name__ + "." + sys._getframe().f_code.co_name), "return_result": retres, "stdout": stdout, } output_data["extras"]["qcvars"] = calcinfo output_data["success"] = True return AtomicResult(**{**input_model.dict(), **output_data})
def parse_output( self, outfiles: Dict[str, str], input_model: AtomicInput ) -> AtomicResult: # lgtm: [py/similar-function] stdout = outfiles.pop("stdout") stderr = outfiles.pop("stderr") method = input_model.model.method.lower() method = method[3:] if method.startswith("c4-") else method # c4mol, if it exists, is dinky, just a clue to geometry of cfour results try: # July 2021: c4mol & vector returns now atin/outfile orientation depending on fix_com,orientation=T/F. previously always atin orientation qcvars, c4hess, c4grad, c4mol, version, module, errorTMP = harvest( input_model.molecule, method, stdout, **outfiles ) except Exception: raise UnknownError(error_stamp(outfiles["input"], stdout, stderr)) if errorTMP != "": raise UnknownError(error_stamp(outfiles["input"], stdout, stderr)) try: if c4grad is not None: qcvars["CURRENT GRADIENT"] = c4grad qcvars[f"{method.upper()} TOTAL GRADIENT"] = c4grad if c4hess is not None: qcvars[f"{method.upper()} TOTAL HESSIAN"] = c4hess qcvars["CURRENT HESSIAN"] = c4hess if input_model.driver.upper() == "PROPERTIES": retres = qcvars[f"CURRENT ENERGY"] else: retres = qcvars[f"CURRENT {input_model.driver.upper()}"] except KeyError: raise UnknownError(error_stamp(outfiles["input"], stdout, stderr)) # TODO: "xalloc(): memory allocation failed!" if isinstance(retres, Decimal): retres = float(retres) elif isinstance(retres, np.ndarray): retres = retres.ravel().tolist() build_out(qcvars) atprop = build_atomicproperties(qcvars) provenance = Provenance(creator="CFOUR", version=self.get_version(), routine="xcfour").dict() if module is not None: provenance["module"] = module output_data = { "schema_version": 1, "molecule": c4mol, # overwrites with outfile Cartesians in case fix_*=F "extras": {**input_model.extras}, "native_files": {k: v for k, v in outfiles.items() if v is not None}, "properties": atprop, "provenance": provenance, "return_result": retres, "stderr": stderr, "stdout": stdout, "success": True, } # got to even out who needs plump/flat/Decimal/float/ndarray/list # Decimal --> str preserves precision # * formerly unnp(qcvars, flat=True).items() output_data["extras"]["qcvars"] = { k.upper(): str(v) if isinstance(v, Decimal) else v for k, v in qcvars.items() } return AtomicResult(**{**input_model.dict(), **output_data})
def compute(self, input_data: "AtomicInput", config: "TaskConfig") -> "AtomicResult": """ Runs TorchANI in FF typing """ # Check if existings and version self.found(raise_error=True) if parse_version(self.get_version()) < parse_version("0.9"): raise ResourceError( "QCEngine's TorchANI wrapper requires version 0.9 or greater.") import torch import torchani import numpy as np device = torch.device("cpu") # Failure flag ret_data = {"success": False} # Build model method = input_data.model.method model = self.get_model(method) # Build species species = input_data.molecule.symbols known_sym = {"H", "C", "N", "O"} if method.lower() == "ani2x": known_sym.update({"S", "F", "Cl"}) unknown_sym = set(species) - known_sym if unknown_sym: raise InputError( f"TorchANI model '{method}' does not support symbols: {unknown_sym}." ) num_atoms = len(species) species = model.species_to_tensor(species).to(device).unsqueeze(0) # Build coord array geom_array = input_data.molecule.geometry.reshape( 1, -1, 3) * ureg.conversion_factor("bohr", "angstrom") coordinates = torch.tensor(geom_array.tolist(), requires_grad=True, device=device) _, energy_array = model((species, coordinates)) energy = energy_array.mean() ensemble_std = energy_array.std() ensemble_scaled_std = ensemble_std / np.sqrt(num_atoms) ret_data["properties"] = {"return_energy": energy.item()} if input_data.driver == "energy": ret_data["return_result"] = ret_data["properties"]["return_energy"] elif input_data.driver == "gradient": derivative = torch.autograd.grad(energy.sum(), coordinates)[0].squeeze() ret_data["return_result"] = (np.asarray( derivative * ureg.conversion_factor("angstrom", "bohr")).ravel().tolist()) elif input_data.driver == "hessian": hessian = torchani.utils.hessian(coordinates, energies=energy) ret_data["return_result"] = np.asarray(hessian) else: raise InputError( f"TorchANI can only compute energy, gradient, and hessian driver methods. Found {input_data.driver}." ) ####################################################################### # Description of the quantities stored in `extras` # # ensemble_energies: # An energy array of all members (models) in an ensemble of models # # ensemble_energy_avg: # The average value of energy array which is also recorded with as # `energy` in QCEngine # # ensemble_energy_std: # The standard deviation of energy array # # ensemble_per_root_atom_disagreement: # The standard deviation scaled by the square root of N, with N being # the number of atoms in the molecule. This is the quantity used in # the query-by-committee (QBC) process in active learning to infer # the reliability of the models in an ensemble, and produce more data # points in the regions where this quantity is below a certain # threshold (inclusion criteria) ret_data["extras"] = input_data.extras.copy() ret_data["extras"].update({ "ensemble_energies": energy_array.detach().numpy(), "ensemble_energy_avg": energy.item(), "ensemble_energy_std": ensemble_std.item(), "ensemble_per_root_atom_disagreement": ensemble_scaled_std.item(), }) ret_data["provenance"] = Provenance( creator="torchani", version="unknown", routine="torchani.builtin.aev_computer") ret_data["schema_name"] = "qcschema_output" ret_data["success"] = True # Form up a dict first, then sent to BaseModel to avoid repeat kwargs which don't override each other return AtomicResult(**{**input_data.dict(), **ret_data})
def compute(self, input_data: "AtomicInput", config: "TaskConfig") -> "AtomicResult": """ Runs OpenMM on given structure, inputs, in vacuum. """ self.found(raise_error=True) from simtk import openmm from simtk import unit with capture_stdout(): import openforcefield.topology as offtop # Failure flag ret_data = {"success": False} # generate basis, not given if not input_data.model.basis: raise InputError("Method must contain a basis set.") # Make sure we are using smirnoff or antechamber basis = input_data.model.basis.lower() if basis in ["smirnoff", "antechamber"]: with capture_stdout(): # try and make the molecule from the cmiles cmiles = None if input_data.molecule.extras: cmiles = input_data.molecule.extras.get( "canonical_isomeric_explicit_hydrogen_mapped_smiles", None) if cmiles is None: cmiles = input_data.molecule.extras.get( "cmiles", {} ).get( "canonical_isomeric_explicit_hydrogen_mapped_smiles", None) if cmiles is not None: off_mol = offtop.Molecule.from_mapped_smiles( mapped_smiles=cmiles) # add the conformer conformer = unit.Quantity(value=np.array( input_data.molecule.geometry), unit=unit.bohr) off_mol.add_conformer(conformer) else: # Process molecule with RDKit rdkit_mol = RDKitHarness._process_molecule_rdkit( input_data.molecule) # Create an Open Force Field `Molecule` from the RDKit Molecule off_mol = offtop.Molecule(rdkit_mol) # now we need to create the system openmm_system = self._generate_openmm_system( molecule=off_mol, method=input_data.model.method, keywords=input_data.keywords) else: raise InputError( "Accepted bases are: {'smirnoff', 'antechamber', }") # Need an integrator for simulation even if we don't end up using it really integrator = openmm.VerletIntegrator(1.0 * unit.femtoseconds) # Set platform to CPU explicitly platform = openmm.Platform.getPlatformByName("CPU") # Set number of threads to use # if `nthreads` is `None`, OpenMM default of all logical cores on # processor will be used nthreads = config.ncores if nthreads is None: nthreads = os.environ.get("OPENMM_CPU_THREADS") if nthreads: properties = {"Threads": str(nthreads)} else: properties = {} # Initialize context context = openmm.Context(openmm_system, integrator, platform, properties) # Set positions from our Open Force Field `Molecule` context.setPositions(off_mol.conformers[0]) # Compute the energy of the configuration state = context.getState(getEnergy=True) # Get the potential as a simtk.unit.Quantity, put into units of hartree q = state.getPotentialEnergy( ) / unit.hartree / unit.AVOGADRO_CONSTANT_NA ret_data["properties"] = {"return_energy": q} # Execute driver if input_data.driver == "energy": ret_data["return_result"] = ret_data["properties"]["return_energy"] elif input_data.driver == "gradient": # Compute the forces state = context.getState(getForces=True) # Get the gradient as a simtk.unit.Quantity with shape (n_atoms, 3) gradient = state.getForces(asNumpy=True) # Convert to hartree/bohr and reformat as 1D array q = (gradient / (unit.hartree / unit.bohr) ).reshape(-1) / unit.AVOGADRO_CONSTANT_NA # Force to gradient ret_data["return_result"] = -1 * q else: raise InputError( f"OpenMM can only compute energy and gradient driver methods. Found {input_data.driver}." ) ret_data["success"] = True ret_data["extras"] = input_data.extras # Move several pieces up a level ret_data["provenance"] = Provenance(creator="openmm", version=openmm.__version__, nthreads=nthreads) return AtomicResult(**{**input_data.dict(), **ret_data})
def parse_output( self, outfiles: Dict[str, str], input_model: "AtomicInput" ) -> AtomicResult: # lgtm: [py/similar-function] # Get the stdout from the calculation (required) stdout = outfiles.pop("stdout") stderr = outfiles.pop("stderr") method = input_model.model.method.lower() method = method[4:] if method.startswith("nwc-") else method # Read the NWChem stdout file and, if needed, the hess or grad files # July 2021: nwmol & vector returns now atin/outfile orientation depending on fix_com,orientation=T/F. previously always atin orientation try: qcvars, nwhess, nwgrad, nwmol, version, module, errorTMP = harvest( input_model.molecule, method, stdout, **outfiles) except Exception: raise UnknownError(error_stamp(outfiles["input"], stdout, stderr)) try: if nwgrad is not None: qcvars[f"{method.upper()} TOTAL GRADIENT"] = nwgrad qcvars["CURRENT GRADIENT"] = nwgrad if nwhess is not None: qcvars[f"{method.upper()} TOTAL HESSIAN"] = nwhess qcvars["CURRENT HESSIAN"] = nwhess # Normalize the output as a float or list of floats if input_model.driver.upper() == "PROPERTIES": retres = qcvars[f"CURRENT ENERGY"] else: retres = qcvars[f"CURRENT {input_model.driver.upper()}"] except KeyError: raise UnknownError(error_stamp(outfiles["input"], stdout, stderr)) if isinstance(retres, Decimal): retres = float(retres) elif isinstance(retres, np.ndarray): retres = retres.tolist() # Get the formatted properties build_out(qcvars) atprop = build_atomicproperties(qcvars) provenance = Provenance(creator="NWChem", version=self.get_version(), routine="nwchem").dict() if module is not None: provenance["module"] = module # Format them inout an output output_data = { "schema_version": 1, "molecule": nwmol, # overwrites with outfile Cartesians in case fix_*=F "extras": { **input_model.extras }, "native_files": {k: v for k, v in outfiles.items() if v is not None}, "properties": atprop, "provenance": provenance, "return_result": retres, "stderr": stderr, "stdout": stdout, "success": True, } # got to even out who needs plump/flat/Decimal/float/ndarray/list # Decimal --> str preserves precision # * formerly unnp(qcvars, flat=True).items() output_data["extras"]["qcvars"] = { k.upper(): str(v) if isinstance(v, Decimal) else v for k, v in qcvars.items() } return AtomicResult(**{**input_model.dict(), **output_data})
def job_output_to_atomic_result(*, atomic_input: AtomicInput, job_output: pb.JobOutput) -> AtomicResult: """Convert JobOutput to AtomicResult""" # Convert job_output to python types # NOTE: Required so that AtomicResult is JSON serializable. Protobuf types are not. jo_dict = MessageToDict(job_output, preserving_proto_field_name=True) if atomic_input.driver.upper() == "ENERGY": # Select first element in list (ground state); may need to modify for excited # states return_result: Union[float, List[float]] = jo_dict["energy"][0] elif atomic_input.driver.upper() == "GRADIENT": return_result = jo_dict["gradient"] else: raise ValueError( f"Unsupported driver: {atomic_input.driver.upper()}, supported drivers " f"include: {SUPPORTED_DRIVERS}") if atomic_input.keywords.get("molden"): # Molden file was request try: molden_string = tcpb_imd_fields2molden_string(job_output) except Exception: # Don't know how this code will blow up, so except everything for now :/ # NOTE: mo_output will set imd_orbital_type to "WHOLE_C" molden_string = "Unable to create molden output. Did you include the 'mo_output' keyword??" else: molden_string = None # Prepare AtomicInput to be base input for AtomicResult atomic_input_dict = atomic_input.dict() atomic_input_dict.pop("provenance", None) # Create AtomicResult as superset of AtomicInput values atomic_result = AtomicResult( **atomic_input_dict, # Create new provenance object provenance=Provenance( creator="terachem_pbs", version="1.9-2021.01-dev", routine="tcpb.TCProtobufClient.compute", ), return_result=return_result, properties=to_atomic_result_properties(job_output), # NOTE: Wavefunction will only be added if atomic_input.protocols.wavefunction != 'none' wavefunction=to_wavefunction_properties(job_output, atomic_input), success=True, ) # And extend extras to include values additional to input extras atomic_result.extras.update({ "qcvars": { "charges": jo_dict.get("charges"), "spins": jo_dict.get("spins"), "meyer_bond_order": jo_dict.get("bond_order"), "orb_size": jo_dict.get("orb_size"), "excited_state_energies": jo_dict.get("energy"), "cis_transition_dipoles": jo_dict.get("cis_transition_dipoles"), "compressed_bond_order": jo_dict.get("compressed_bond_order"), "compressed_hessian": jo_dict.get("compressed_hessian"), "compressed_ao_data": jo_dict.get("compressed_ao_data"), "compressed_primitive_data": jo_dict.get("compressed_primitive_data"), "compressed_mo_vector": jo_dict.get("compressed_mo_vector"), "imd_mmatom_gradient": jo_dict.get("imd_mmatom_gradient"), }, "job_extras": { "job_dir": jo_dict.get("job_dir"), "job_scr_dir": jo_dict.get("job_scr_dir"), "server_job_id": jo_dict.get("server_job_id"), "orb1afile": jo_dict.get("orb1afile"), "orb1bfile": jo_dict.get("orb1bfile"), }, "molden": molden_string, }) return atomic_result
def compute(self, input_model: "AtomicInput", config: "TaskConfig") -> "AtomicResult": """ Runs MRChem in executable mode """ self.found(raise_error=True) # Location resolution order config.scratch_dir, /tmp parent = config.scratch_directory error_message = None compute_success = False job_input = self.build_input(input_model, config) input_data = copy.deepcopy(job_input["mrchem_json"]) output_data = { "keywords": input_data, "schema_name": "qcschema_output", "schema_version": 1, "model": input_model.model, "molecule": input_model.molecule, "driver": input_model.driver, } with temporary_directory(parent=parent, suffix="_mrchem_scratch") as tmpdir: # create folders for d in job_input["folders"]: if not Path(d).exists(): Path(d).mkdir() # Execute the program success, output = execute( command=job_input["command"] + ["data.json"], infiles={"data.json": json.dumps(job_input["mrchem_json"])}, outfiles=["data.json"], scratch_directory=tmpdir, ) if success: output_data["stdout"] = output["stdout"] # get data from the MRChem JSON output and transfer it to the QCSchema output mrchem_json = json.loads(output["outfiles"]["data.json"]) mrchem_output = mrchem_json["output"] output_data["success"] = mrchem_output["success"] output_data["provenance"] = mrchem_output["provenance"] # update the "routine" under "provenance" output_data["provenance"]["routine"] = " ".join( job_input["command"]) # fill up properties output_data["properties"] = extract_properties(mrchem_output) # prepare a list of computed response properties known_rsp_props = [ ("dipole_moment", "vector"), ("quadrupole_moment", "tensor"), ("polarizability", "tensor"), ("magnetizability", "tensor"), ("nmr_shielding", "tensor"), ] computed_rsp_props = [ ("properties", x, y, z) for x, z in known_rsp_props if x in mrchem_output["properties"] for y in mrchem_output["properties"][x].keys() ] # fill up extras: # * under "raw_output" the whole JSON output from MRChem # * under "properties" all the properties computed by MRChem output_data["extras"] = { "raw_output": mrchem_json, "properties": { f"{ks[1]}": { f"{ks[2]}": _nested_get(mrchem_output, ks) } for ks in computed_rsp_props }, } # fill up return_result if input_model.driver == "energy": output_data["return_result"] = mrchem_output["properties"][ "scf_energy"]["E_tot"] elif input_model.driver == "properties": output_data["return_result"] = { f"{ks[1]}": { f"{ks[2]}": _nested_get(mrchem_output, ks) } for ks in computed_rsp_props } else: raise InputError( f"Driver {input_model.driver} not implemented for MRChem." ) compute_success = mrchem_output["success"] else: output_data["stderr"] = output["stderr"] output_data["error"] = { "error_message": output["stderr"], "error_type": "execution_error", } # Dispatch errors, PSIO Errors are not recoverable for future runs if compute_success is False: if ("SIGSEV" in error_message) or ("SIGSEGV" in error_message) or ( "segmentation fault" in error_message): raise RandomError(error_message) else: raise UnknownError(error_message) return AtomicResult(**output_data)
def compute(self, input_data: "AtomicInput", config: "TaskConfig") -> "AtomicResult": """ Runs OpenMM on given structure, inputs, in vacuum. """ self.found(raise_error=True) from simtk import openmm from simtk import unit import openforcefield.topology as offtop # Failure flag ret_data = {"success": False} # generate basis, not given if not input_data.model.basis: basis = self._generate_basis(input_data) ret_data["basis"] = basis # get number of threads to use from `TaskConfig.ncores`; otherwise, try environment variable nthreads = config.ncores if nthreads is None: nthreads = os.environ.get("OPENMM_CPU_THREADS") # Set workdir to scratch # Location resolution order config.scratch_dir, /tmp parent = config.scratch_directory with temporary_directory(parent=parent, suffix="_openmm_scratch") as tmpdir: # Grab molecule, forcefield jmol = input_data.molecule # TODO: If urls are supported by # `openforcefield.typing.engines.smirnoff.ForceField` already, we # can eliminate the `offxml` and `url` distinction # URL processing can happen there instead if getattr(input_data.model, "offxml", None): # we were given a file path or relative path offxml = input_data.model.offxml # Load an Open Force Field `ForceField` off_forcefield = self._get_off_forcefield(offxml, offxml) elif getattr(input_data.model, "url", None): # we were given a url with urllib.request.urlopen(input_data.model.url) as req: xml = req.read() # Load an Open Force Field `ForceField` off_forcefield = self._get_off_forcefield(xml.decode(), xml) else: raise InputError("OpenMM requires either `model.offxml` or `model.url` to be set") # Process molecule with RDKit rdkit_mol = RDKitHarness._process_molecule_rdkit(jmol) # Create an Open Force Field `Molecule` from the RDKit Molecule off_mol = offtop.Molecule(rdkit_mol) # Create OpenMM system in vacuum from forcefield, molecule off_top = off_mol.to_topology() openmm_system = self._get_openmm_system(off_forcefield, off_top) # Need an integrator for simulation even if we don't end up using it really integrator = openmm.VerletIntegrator(1.0 * unit.femtoseconds) # Set platform to CPU explicitly platform = openmm.Platform.getPlatformByName("CPU") # Set number of threads to use # if `nthreads` is `None`, OpenMM default of all logical cores on # processor will be used if nthreads: properties = {"Threads": str(nthreads)} else: properties = {} # Initialize context context = openmm.Context(openmm_system, integrator, platform, properties) # Set positions from our Open Force Field `Molecule` context.setPositions(off_mol.conformers[0]) # Compute the energy of the configuration state = context.getState(getEnergy=True) # Get the potential as a simtk.unit.Quantity, put into units of hartree q = state.getPotentialEnergy() / unit.hartree ret_data["properties"] = {"return_energy": q.value_in_unit(q.unit)} # Execute driver if input_data.driver == "energy": ret_data["return_result"] = ret_data["properties"]["return_energy"] elif input_data.driver == "gradient": # Get number of atoms n_atoms = len(jmol.symbols) # Compute the forces state = context.getState(getForces=True) # Get the gradient as a simtk.unit.Quantity with shape (n_atoms, 3) gradient = state.getForces(asNumpy=True) # Convert to hartree/bohr and reformat as 1D array q = (gradient / (unit.hartree / unit.bohr)).reshape([n_atoms * 3]) ret_data["return_result"] = q.value_in_unit(q.unit) else: raise InputError( f"OpenMM can only compute energy and gradient driver methods. Found {input_data.driver}." ) ret_data["success"] = True # Move several pieces up a level ret_data["provenance"] = Provenance(creator="openmm", version=openmm.__version__, nthreads=nthreads) return AtomicResult(**{**input_data.dict(), **ret_data})
def parse_output(self, outfiles: Dict[str, str], input_model: "AtomicInput") -> "AtomicResult": scf_map = {"energy": "scf_total_energy", "n_iter": "scf_iterations"} dft_map = scf_map.copy() hf_map = scf_map.copy() xtb_map = scf_map.copy() energy_command_map = {"dft": dft_map, "hf": hf_map, "xtb": xtb_map} extras_map = {"converged": "scf_converged"} wavefunction_map = { "restricted": { "orbitals": "scf_orbitals_a", "density": "scf_density_a", "fock": "scf_fock_a", "eigenvalues": "scf_eigenvalues_a", "occupations": "scf_occupations_a", }, "unrestricted": { "orbitals_alpha": "scf_orbitals_a", "orbitals_beta": "scf_orbitals_b", "density_alpha": "scf_density_a", "density_beta": "scf_density_b", "fock_alpha": "scf_fock_a", "fock_beta": "scf_fock_b", "eigenvalues_alpha": "scf_eigenvalues_a", "eigenvalues_beta": "scf_eigenvalues_b", "occupations_alpha": "scf_occupations_a", "occupations_beta": "scf_occupations_b", }, } # Determine the energy_command energy_command = self.determine_energy_command( input_model.model.method) gradient_map = {"gradient": "gradient"} gradient_map.update({"energy": "scf_total_energy"}) # TODO Uncomment once entos adds scf_map to gradient json results # gradient_map.update(energy_command_map[energy_command]) hessian_map = {"hessian": "hessian"} hessian_map.update(energy_command_map[energy_command]) # Determine whether to use the energy map or the gradient map if input_model.driver == "energy": entos_map = energy_command_map[energy_command] elif input_model.driver == "gradient": entos_map = gradient_map elif input_model.driver == "hessian": entos_map = hessian_map else: raise NotImplementedError( f"Driver {input_model.driver} not implemented for entos.") # Parse the results.json output from entos properties = {} load_results = json.loads(outfiles["results.json"]) entos_results = load_results["json_results"] for key in entos_map.keys(): if key in entos_results: properties[entos_map[key]] = entos_results[key] # Parse calcinfo_* properties from the results.json if "ao_basis" in entos_results.keys(): properties["calcinfo_nbasis"] = entos_results["ao_basis"][ "__Basis"]["n_functions"] if "structure" in entos_results.keys(): properties["calcinfo_natom"] = len( entos_results["structure"]["__Atoms"]["atoms"]) # Parse wavefunction quantities from entos_results wavefunction = {} if input_model.protocols.wavefunction != "none": # First parse basis set information if "ao_basis" in entos_results.keys(): atom_map = [ item[0] for item in entos_results["structure"]["__Atoms"]["atoms"] ] # Each item in electron_shells is a dictionary containing info for one basis function electron_shells_by_center = {} for basis_item in entos_results["ao_basis"]["__Basis"][ "electron_shells"]: center_index = basis_item["center_index"] electron_shell_info = { "angular_momentum": [basis_item["angular_momentum"]], "harmonic_type": basis_item["function_type"].split("_")[-1], "exponents": basis_item["exponents"], "coefficients": basis_item["coefficients"], } if center_index not in electron_shells_by_center: electron_shells_by_center[center_index] = [ electron_shell_info ] else: electron_shells_by_center[center_index].append( electron_shell_info) # Construct center_data from electron_shells_by_center # Note: Duplicate atoms will over write each other center_data = {} for i in range(len(electron_shells_by_center)): basis_center_info = { "electron_shells": electron_shells_by_center[i] } center_data[atom_map[i]] = basis_center_info # Construct BasisSet basis_info = { "name": input_model.model.basis, # "description": "", # None provided by entos "center_data": center_data, "atom_map": atom_map, "nbf": entos_results["ao_basis"]["__Basis"]["n_functions"], } basis_set = BasisSet(**basis_info) wavefunction["basis"] = basis_set else: raise KeyError( f"Basis set information not found so wavefunction protocol {input_model.protocols.wavefunction} is not available." ) # Now parse wavefunction information n_channels = entos_results["n_channels"] if n_channels == 1: wavefunction["restricted"] = True for key in wavefunction_map["restricted"].keys(): if key in entos_results: if "orbitals" in key: orbitals_transposed = reorder_column_ao_indices( np.array(entos_results[key]), basis_set, self._entos_to_cca_ao_order) wavefunction[wavefunction_map["restricted"][ key]] = orbitals_transposed.transpose() elif "density" in key or "fock" in key: wavefunction[wavefunction_map["restricted"][ key]] = reorder_row_and_column_ao_indices( entos_results[key], basis_set, self._entos_to_cca_ao_order) else: wavefunction[wavefunction_map["restricted"] [key]] = entos_results[key] # TODO Add a test in QCEngineRecords elif n_channels == 2: wavefunction["restricted"] = False for key in wavefunction_map["unrestricted"].keys(): if key in entos_results: if "orbitals" in key: orbitals_transposed = reorder_column_ao_indices( np.array(entos_results[key]), basis_set, self._entos_to_cca_ao_order) wavefunction[wavefunction_map["restricted"][ key]] = orbitals_transposed.transpose() elif "density" in key or "fock" in key: wavefunction[wavefunction_map["restricted"][ key]] = reorder_row_and_column_ao_indices( entos_results[key], basis_set, self._entos_to_cca_ao_order) else: wavefunction[wavefunction_map["restricted"] [key]] = entos_results[key] # Parse results for the extras_map from results.json extras = {} for key in extras_map.keys(): if key in entos_results: extras[extras_map[key]] = entos_results[key] # Initialize output_data by copying over input_model.dict() output_data = input_model.dict() # Determine the correct return_result if input_model.driver == "energy": if "scf_total_energy" in properties: output_data["return_result"] = properties["scf_total_energy"] else: raise KeyError( f"Could not find {input_model.model} total energy") elif input_model.driver == "gradient" or input_model.driver == "hessian": if input_model.driver in properties: output_data["return_result"] = properties.pop( input_model.driver) else: raise KeyError(f"{input_model.driver} not found.") else: raise NotImplementedError( f"Driver {input_model.driver} not implemented for entos.") output_data["properties"] = properties if input_model.protocols.wavefunction != "none": output_data["wavefunction"] = wavefunction output_data["extras"].update(extras) output_data["schema_name"] = "qcschema_output" output_data["success"] = True return AtomicResult(**output_data)
def compute(self, input_model: "AtomicInput", config: "TaskConfig") -> "AtomicResult": """ Runs Psi4 in API mode """ self.found(raise_error=True) pversion = parse_version(self.get_version()) if pversion < parse_version("1.2"): raise ResourceError("Psi4 version '{}' not understood.".format( self.get_version())) # Location resolution order config.scratch_dir, $PSI_SCRATCH, /tmp parent = config.scratch_directory if parent is None: parent = os.environ.get("PSI_SCRATCH", None) error_type = None error_message = None compute_success = False if isinstance(input_model.model.basis, BasisSet): raise InputError( "QCSchema BasisSet for model.basis not implemented. Use string basis name." ) # Basis must not be None for HF3c old_basis = input_model.model.basis input_model.model.__dict__["basis"] = old_basis or "" with temporary_directory(parent=parent, suffix="_psi_scratch") as tmpdir: caseless_keywords = { k.lower(): v for k, v in input_model.keywords.items() } if (input_model.molecule.molecular_multiplicity != 1) and ("reference" not in caseless_keywords): input_model.keywords["reference"] = "uhf" # Old-style JSON-based command line if pversion < parse_version("1.4a2.dev160"): # Setup the job input_data = input_model.dict(encoding="json") input_data["nthreads"] = config.ncores input_data["memory"] = int(config.memory * 1024 * 1024 * 1024 * 0.95) # Memory in bytes input_data["success"] = False input_data["return_output"] = True if input_data["schema_name"] == "qcschema_input": input_data["schema_name"] = "qc_schema_input" # Execute the program success, output = execute( [ which("psi4"), "--scratch", tmpdir, "--json", "data.json" ], {"data.json": json.dumps(input_data)}, ["data.json"], scratch_directory=tmpdir, ) output_data = input_data.copy() if success: output_data = json.loads(output["outfiles"]["data.json"]) if "extras" not in output_data: output_data["extras"] = {} # Check QCVars local_qcvars = output_data.pop("psi4:qcvars", None) if local_qcvars: # Edge case where we might already have qcvars, should not happen if "qcvars" in output_data["extras"]: output_data["extras"][ "local_qcvars"] = local_qcvars else: output_data["extras"]["qcvars"] = local_qcvars if output_data.get("success", False) is False: error_message, error_type = self._handle_errors( output_data) else: compute_success = True else: error_message = output.get("stderr", "No STDERR output") error_type = "execution_error" # Reset the schema if required output_data["schema_name"] = "qcschema_output" output_data.pop("memory", None) output_data.pop("nthreads", None) output_data["stdout"] = output_data.pop("raw_output", None) else: if input_model.extras.get("psiapi", False): import psi4 orig_scr = psi4.core.IOManager.shared_object( ).get_default_path() psi4.core.set_num_threads(config.ncores, quiet=True) psi4.set_memory(f"{config.memory}GB", quiet=True) # psi4.core.IOManager.shared_object().set_default_path(str(tmpdir)) if pversion < parse_version( "1.4"): # adjust to where DDD merged # slightly dangerous in that if `qcng.compute({..., psiapi=True}, "psi4")` called *from psi4 # session*, session could unexpectedly get its own files cleaned away. output_data = psi4.schema_wrapper.run_qcschema( input_model).dict() else: output_data = psi4.schema_wrapper.run_qcschema( input_model, postclean=False).dict() # success here means execution returned. output_data may yet be qcel.models.AtomicResult or qcel.models.FailedOperation success = True if output_data.get("success", False): output_data["extras"]["psiapi_evaluated"] = True psi4.core.IOManager.shared_object().set_default_path( orig_scr) else: run_cmd = [ which("psi4"), "--scratch", str(tmpdir), "--nthread", str(config.ncores), "--memory", f"{config.memory}GB", "--qcschema", "data.msgpack", ] input_files = { "data.msgpack": input_model.serialize("msgpack-ext") } success, output = execute(run_cmd, input_files, ["data.msgpack"], as_binary=["data.msgpack"], scratch_directory=tmpdir) if success: output_data = deserialize( output["outfiles"]["data.msgpack"], "msgpack-ext") else: output_data = input_model.dict() if success: if output_data.get("success", False) is False: error_message, error_type = self._handle_errors( output_data) else: compute_success = True else: error_message = output.get("stderr", "No STDERR output") error_type = "execution_error" # Dispatch errors, PSIO Errors are not recoverable for future runs if compute_success is False: if "PSIO Error" in error_message: if "scratch directory" in error_message: # Psi4 cannot access the folder or file raise ResourceError(error_message) else: # Likely a random error, worth retrying raise RandomError(error_message) elif ("SIGSEV" in error_message) or ( "SIGSEGV" in error_message) or ("segmentation fault" in error_message): raise RandomError(error_message) elif ("TypeError: set_global_option" in error_message) or (error_type == "ValidationError"): raise InputError(error_message) elif "RHF reference is only for singlets" in error_message: raise InputError(error_message) else: raise UnknownError(error_message) # Reset basis output_data["model"]["basis"] = old_basis # Move several pieces up a level output_data["provenance"]["memory"] = round(config.memory, 3) output_data["provenance"]["nthreads"] = config.ncores # Delete keys output_data.pop("return_output", None) return AtomicResult(**output_data)
def parse_output(self, outfiles: Dict[str, str], input_model: "AtomicInput") -> "AtomicResult": tree = ET.ElementTree(ET.fromstring(outfiles["dispatch.xml"])) root = tree.getroot() # print(root.tag) # TODO Read information from molecule tag # - cml:molecule, cml:atomArray (?) # - basisSet # - Be aware of symmetry. Might only be able to support if symmetry,nosym # - orbitals # - Be aware of symmetry. Might only be able to support if symmetry,nosym # NOTE: Spherical basis set ordering in Molpro (with no symmetry) # S --> 0 # P --> +1, -1, 0 # D --> 0, -2, +1, +2, -1 # F --> +1, -1, 0, +3, -2, -3, +2 # G --> 0, -2, +1, +4, -1, +2, -4, +3, -3 # H --> +1, -1, +2, +3, -4, -3, +4, -5, 0, +5, -2 # I --> +6, -2, +5, +4, -5, +2, -6, +3, -4, 0, -3, -1, +1 properties = {} extras = {} name_space = { "molpro_uri": "http://www.molpro.net/schema/molpro-output" } # Molpro commands map molpro_map = { "Energy": { "HF": "scf_total_energy", "RHF": "scf_total_energy", "UHF": "scf_total_energy", "KS": "scf_total_energy", "RKS": "scf_total_energy", "UKS": "scf_total_energy", }, "total energy": { "MP2": "mp2_total_energy", "CCSD": "ccsd_total_energy", "CCSD(T)": "ccsd_prt_pr_total_energy", }, "correlation energy": { "MP2": "mp2_correlation_energy", "CCSD": "ccsd_correlation_energy", "CCSD(T)": "ccsd_prt_pr_correlation_energy", # Need both CCSD(T) and Total "Total": "ccsd_prt_pr_correlation_energy", # Total corresponds to CCSD(T) correlation energy }, "singlet pair energy": { "MP2": "mp2_singlet_pair_energy", "CCSD": "ccsd_singlet_pair_energy" }, "triplet pair energy": { "MP2": "mp2_triplet_pair_energy", "CCSD": "ccsd_triplet_pair_energy" }, "Dipole moment": { "HF": "scf_dipole_moment", "RHF": "scf_dipole_moment", "UHF": "scf_dipole_moment", "KS": "scf_dipole_moment", "RKS": "scf_dipole_moment", "UKS": "scf_dipole_moment", "MP2": "mp2_dipole_moment", "CCSD": "ccsd_dipole_moment", "CCSD(T)": "ccsd_prt_pr_dipole_moment", }, } # Started adding basic support for local correlation methods in Molpro molpro_extras_map = { "total energy": { "LMP2": "local_mp2_total_energy", "LCCSD": "local_ccsd_total_energy", "LCCSD(T0)": "local_ccsd_prt0_pr_total_energy", "LCCSD(T)": "local_ccsd_prt_pr_total_energy", }, "correlation energy": { "LMP2": "local_mp2_correlation_energy", "LCCSD": "local_ccsd_correlation_energy" }, "singlet pair energy": { "LMP2": "local_mp2_singlet_pair_energy", "LCCSD": "local_ccsd_singlet_pair_energy" }, "triplet pair energy": { "LMP2": "local_mp2_triplet_pair_energy", "LCCSD": "local_ccsd_triplet_pair_energy" }, "singles energy": { "LCCSD": "local_ccsd_singles_energy" }, # "strong pair energy": { # "LCCSD": "local_ccsd_strong_pair_energy" # }, # "weak pair energy": { # "LMP2": "local_mp2_weak_pair_energy" # } } # Molpro variables map used for quantities not found in the command map molpro_variable_map = { "_ENUC": "nuclear_repulsion_energy", "_DFTFUN": "scf_xc_energy", "_NELEC": ["calcinfo_nalpha", "calcinfo_nbeta"] # "_EMP2_SCS": "scs_mp2_total_energy" } # Process data in molpro_map by looping through each jobstep # The jobstep tag in Molpro contains output from commands (e.g. {HF}, {force}) for jobstep in root.findall("molpro_uri:job/molpro_uri:jobstep", name_space): command = jobstep.attrib["command"] if "FORCE" in command: # Grab gradient for child in jobstep.findall("molpro_uri:gradient", name_space): # Stores gradient as a single list where the ordering is [1x, 1y, 1z, 2x, 2y, 2z, ...] properties["gradient"] = [ float(x) for x in child.text.split() ] else: # Grab energies and dipole moment for child in jobstep.findall("molpro_uri:property", name_space): property_name = child.attrib["name"] property_method = child.attrib["method"] value = child.attrib["value"] if property_name in molpro_map and property_method in molpro_map[ property_name]: if property_name == "Dipole moment": properties[molpro_map[property_name] [property_method]] = [ float(x) for x in value.split() ] else: properties[molpro_map[property_name] [property_method]] = float(value) elif property_name in molpro_extras_map and property_method in molpro_extras_map[ property_name]: extras[molpro_extras_map[property_name] [property_method]] = float(value) # Convert triplet and singlet pair correlation energies to opposite-spin and same-spin correlation energies if "mp2_singlet_pair_energy" in properties and "mp2_triplet_pair_energy" in properties: properties["mp2_same_spin_correlation_energy"] = ( 2.0 / 3.0) * properties["mp2_triplet_pair_energy"] properties["mp2_opposite_spin_correlation_energy"] = ( 1.0 / 3.0) * properties["mp2_triplet_pair_energy"] + properties[ "mp2_singlet_pair_energy"] del properties["mp2_singlet_pair_energy"] del properties["mp2_triplet_pair_energy"] if "ccsd_singlet_pair_energy" in properties and "ccsd_triplet_pair_energy" in properties: properties["ccsd_same_spin_correlation_energy"] = ( 2.0 / 3.0) * properties["ccsd_triplet_pair_energy"] properties["ccsd_opposite_spin_correlation_energy"] = ( 1.0 / 3.0) * properties["ccsd_triplet_pair_energy"] + properties[ "ccsd_singlet_pair_energy"] del properties["ccsd_singlet_pair_energy"] del properties["ccsd_triplet_pair_energy"] # Process data in molpro_variable_map # Note: For the DFT case molecule_method is the name of the functional plus R or U in front molecule = root.find("molpro_uri:job/molpro_uri:molecule", name_space) molecule_method = molecule.attrib["method"] molecule_final_energy = float( molecule.attrib["energy"] ) # Energy from the molecule tag in case its needed # Loop over each variable under the variables tag to grab additional info from molpro_variable_map for variable in molecule.findall( "molpro_uri:variables/molpro_uri:variable", name_space): variable_name = variable.attrib["name"] if variable_name in molpro_variable_map: if variable_name == "_NELEC": nelec = int(float(variable[0].text)) nunpaired = input_model.molecule.molecular_multiplicity - 1 nbeta = (nelec - nunpaired) // 2 nalpha = nelec - nbeta properties[molpro_variable_map[variable_name][0]] = nalpha properties[molpro_variable_map[variable_name][1]] = nbeta else: properties[molpro_variable_map[variable_name]] = float( variable[0].text) # Process basis set data basis_set = root.find( "molpro_uri:job/molpro_uri:molecule/molpro_uri:basisSet", name_space) nbasis = int(basis_set.attrib["length"]) # angular_type = basis_set.attrib['angular'] # cartesian vs spherical properties["calcinfo_nbasis"] = nbasis # Grab the method from input method = input_model.model.method.upper() # Determining the final energy # Throws an error if the energy isn't found for the method specified from the input_model. if method in molpro_map["total energy"].keys( ) and molpro_map["total energy"][method] in properties: final_energy = properties[molpro_map["total energy"][method]] elif method in molpro_map["Energy"].keys( ) and molpro_map["Energy"][method] in properties: final_energy = properties[molpro_map["Energy"][method]] else: # Back up method for determining final energy if not already present in properties # Use the total energy from the molecule tag if it matches the input method # if input_model.model.method.upper() in molecule_method: if method in molecule_method: final_energy = molecule_final_energy if method in self._post_hf_methods: properties[molpro_map["total energy"] [method]] = molecule_final_energy properties[molpro_map["correlation energy"][method]] = ( molecule_final_energy - properties["scf_total_energy"]) elif method in self._dft_functionals: properties[molpro_map["Energy"] ["KS"]] = molecule_final_energy elif method in self._hf_methods: properties[molpro_map["Energy"] [method]] = molecule_final_energy else: raise KeyError(f"Could not find {method} total energy") # Initialize output_data by copying over input_model.dict() output_data = input_model.dict() # Determining return_result if input_model.driver == "energy": output_data["return_result"] = final_energy elif input_model.driver == "gradient": output_data["return_result"] = properties.pop("gradient") # Final output_data assignments needed for the AtomicResult object output_data["properties"] = properties output_data["extras"].update(extras) output_data["schema_name"] = "qcschema_output" output_data["stdout"] = outfiles["dispatch.out"] output_data["success"] = True return AtomicResult(**output_data)