def test_result_wrong_serialization(water, result_input, res_failure): res_out = Result(molecule=water, **result_input, **res_failure) assert isinstance(res_out.error, ComputeError) assert isinstance(res_out.dict(), dict) out_json = res_out.json() assert isinstance(out_json, str) assert 'its all good' in out_json
def test_result_pass_serialization(water, result_input, res_success): res_in = ResultInput(molecule=water, **result_input) assert isinstance(res_in.dict(), dict) assert isinstance(res_in.json(), str) res_out = Result(molecule=water, **result_input, **res_success) assert isinstance(res_out.dict(), dict) assert isinstance(res_out.json(), str)
def parse_output(self, outfiles: Dict[str, str], input_model: 'ResultInput') -> 'Result': # gamessmol, if it exists, is dinky, just a clue to geometry of gamess results qcvars, gamessgrad, gamessmol = harvest(input_model.molecule, outfiles["stdout"]) if gamessgrad is not None: qcvars['CURRENT GRADIENT'] = gamessgrad qcvars = unnp(qcvars, flat=True) output_data = { 'schema_name': 'qcschema_output', 'molecule': gamessmol, 'schema_version': 1, 'extras': {}, 'properties': { 'nuclear_repulsion_energy': gamessmol.nuclear_repulsion_energy(), }, 'return_result': qcvars[f'CURRENT {input_model.driver.upper()}'], 'stdout': outfiles["stdout"], } # got to even out who needs plump/flat/Decimal/float/ndarray/list output_data['extras']['qcvars'] = { k.upper(): float(v) if isinstance(v, Decimal) else v for k, v in qcel.util.unnp(qcvars, flat=True).items() } output_data['success'] = True return Result(**{**input_model.dict(), **output_data})
def opti_success(water, result_input, res_success): res = Result(molecule=water, **result_input, **res_success) return { "success": True, "trajectory": [res] * 3, "final_molecule": water, "energies": [1.0, 2.0, 3.0] }
def _compute(self, driver): import psi4 inp = self.generate_schema_input(driver) ret = psi4.json_wrapper.run_json(inp.dict()) ret = Result(**ret) return ret
def parse_output(self, outfiles: Dict[str, str], input_model: 'ResultInput') -> 'Result': # print('PARSE') # pp.pprint(outfiles) # gamessmol, if it exists, is dinky, just a clue to geometry of gamess results qcvars, gamessgrad, gamessmol = harvest( input_model.molecule, outfiles["stdout"]) #**gamessfiles) if gamessgrad is not None: qcvars['CURRENT GRADIENT'] = gamessgrad qcvars = unnp(qcvars, flat=True) output_data = { 'schema_name': 'qcschema_output', 'molecule': gamessmol, 'schema_version': 1, 'extras': {}, 'properties': { 'nuclear_repulsion_energy': gamessmol.nuclear_repulsion_energy(), }, 'return_result': qcvars[f'CURRENT {input_model.driver.upper()}'], 'stdout': outfiles["stdout"], } # # Absorb results into psi4 data structures # for key in qcvars.keys(): # core.set_variable(key.upper(), float(qcvars[key])) # if qcdbmolecule is None and gamessmol is not None: # molecule = geometry(gamessmol.create_psi4_string_from_molecule(), name='blank_molecule_psi4_yo') # molecule.update_geometry() # # This case arises when no Molecule going into calc (cfour {} block) but want # # to know the orientation at which grad, properties, etc. are returned (c4mol). # # c4mol is dinky, w/o chg, mult, dummies and retains name # # blank_molecule_psi4_yo so as to not interfere with future cfour {} blocks # got to even out who needs plump/flat/Decimal/float/ndarray/list output_data['extras']['qcvars'] = { k.upper(): float(v) if isinstance(v, Decimal) else v for k, v in qcel.util.unnp(qcvars, flat=True).items() } # # Quit if gamess threw error # if core.get_variable('GAMESS ERROR CODE'): # raise ValidationError("""gamess exited abnormally.""") # # P4GAMESS_INFO.clear() # P4GAMESS_INFO.update(internal_p4gamess_info) # # optstash.restore() output_data['success'] = True return Result(**{**input_model.dict(), **output_data})
def psi4(input_model, config): """ Runs Psi4 in API mode """ try: import psi4 except ImportError: raise ImportError("Could not find Psi4 in the Python path.") # Setup the job input_data = input_model.copy().dict() input_data["nthreads"] = config.ncores input_data["memory"] = int(config.memory * 1024 * 1024 * 1024 * 0.95) # Memory in bytes input_data["success"] = False reset_schema = False if input_data["schema_name"] == "qcschema_input": input_data["schema_name"] = "qc_schema_input" reset_schema = True scratch = config.scratch_directory if scratch is not None: input_data["scratch_location"] = scratch psi_version = _parse_psi_version(psi4.__version__) if psi_version > parse_version("1.2"): mol = psi4.core.Molecule.from_schema(input_data) if mol.multiplicity() != 1: input_data["keywords"]["reference"] = "uks" output_data = psi4.json_wrapper.run_json(input_data) else: raise TypeError( "Psi4 version '{}' not understood.".format(psi_version)) if reset_schema: output_data["schema_name"] = "qcschema_input" # Dispatch errors, PSIO Errors are not recoverable for future runs if output_data["success"] is False: if "PSIO Error" in output_data["error"]: raise ValueError(output_data["error"]) # Move several pieces up a level if output_data["success"]: output_data["provenance"]["memory"] = round( input_data["memory"] / (1024**3), 3) output_data["provenance"]["nthreads"] = input_data["nthreads"] del output_data["memory"], input_data["nthreads"] return Result(**output_data) return FailedOperation(success=output_data.pop("success", False), error=output_data.pop("error"), input_data=output_data)
def compute(self, input_data: 'ResultInput', config: 'JobConfig') -> 'Result': self.found(raise_error=True) # Set up the job input_data = input_data.copy().dict() input_data["success"] = False output_data = run_json(input_data) if output_data["success"]: return Result(**output_data) return FailedOperation( success=output_data.pop("success", False), error=output_data.pop("error"), input_data=output_data)
def parse_output(self, outfiles: Dict[str, str], input_model: 'ResultInput') -> 'Result': stdout = outfiles.pop("stdout") # nwmol, if it exists, is dinky, just a clue to geometry of nwchem results # qcvars, c4hess, c4grad, c4mol, version, errorTMP = harvest(input_model.molecule, stdout, **outfiles) #ORIGpsivar, nwhess, nwgrad, nwmol, version, errorTMP = harvester.harvest(qmol, nwchemrec['stdout'], **nwfiles) qcvars, nwhess, nwgrad, nwmol, version, errorTMP = harvest( input_model.molecule, stdout, **outfiles) if nwgrad is not None: qcvars['CURRENT GRADIENT'] = nwgrad if nwhess is not None: qcvars['CURRENT HESSIAN'] = nwhess retres = qcvars[f'CURRENT {input_model.driver.upper()}'] if isinstance(retres, Decimal): retres = float(retres) elif isinstance(retres, np.ndarray): retres = retres.ravel().tolist() output_data = { 'schema_name': 'qcschema_output', 'schema_version': 1, 'extras': { 'outfiles': outfiles, }, 'properties': {}, 'provenance': Provenance(creator="NWChem", version=self.get_version(), routine="nwchem"), 'return_result': retres, 'stdout': stdout, } # got to even out who needs plump/flat/Decimal/float/ndarray/list # Decimal --> str preserves precision output_data['extras']['qcvars'] = { k.upper(): str(v) if isinstance(v, Decimal) else v for k, v in qcel.util.unnp(qcvars, flat=True).items() } output_data['success'] = True return Result(**{**input_model.dict(), **output_data})
def compute(self, input_data: 'ResultInput', config: 'JobConfig') -> 'Result': if not which('dftd3', return_bool=True): raise ImportError("Could not find dftd3 in the envvar path.") # Setup the job input_data = input_data.copy().dict() input_data["success"] = False output_data = run_json(input_data) if output_data["success"]: return Result(**output_data) return FailedOperation(success=output_data.pop("success", False), error=output_data.pop("error"), input_data=output_data)
def parse_output(self, outfiles: Dict[str, str], input_model: 'ResultInput') -> 'Result': output_data = {} properties = {} # Parse the output file, collect properties and gradient output_lines = outfiles["dispatch.out"].split('\n') gradients = [] natom = len(input_model.molecule.symbols) for idx, line in enumerate(output_lines): fields = line.split() if fields[:1] == ["energy:"]: properties["scf_total_energy"] = float(fields[-1]) elif fields[:2] == ["Molecular", "Dipole:"]: properties["scf_dipole_moment"] = [ float(x) for x in fields[2:5] ] elif fields[:3] == ["SCF", "converged", "in"]: properties["scf_iterations"] = int(fields[3]) elif fields == ["Gradient", "(hartree/bohr):"]: # Gradient is stored as (dE/dx1,dE/dy1,dE/dz1,dE/dx2,dE/dy2,...) for i in range(idx + 2, idx + 2 + natom): grad = output_lines[i].strip('\n').split()[1:] gradients.extend([float(x) for x in grad]) if input_model.driver == 'gradient': if len(gradients) == 0: raise ValueError('Gradient not found.') else: output_data["return_result"] = gradients # Replace return_result with final_energy if gradient wasn't called if "return_result" not in output_data: if "scf_total_energy" in properties: output_data["return_result"] = properties["scf_total_energy"] else: raise KeyError("Could not find SCF total energy") output_data["properties"] = properties output_data['schema_name'] = 'qcschema_output' output_data['success'] = True return Result(**{**input_model.dict(), **output_data})
def compute(self, input_model: 'ResultInput', config: 'JobConfig') -> 'Result': """ Runs Psi4 in API mode """ self.found(raise_error=True) # Setup the job input_data = input_model.json_dict() input_data["nthreads"] = config.ncores input_data["memory"] = int(config.memory * 1024 * 1024 * 1024 * 0.95) # Memory in bytes input_data["success"] = False input_data["return_output"] = True if input_data["schema_name"] == "qcschema_input": input_data["schema_name"] = "qc_schema_input" if config.scratch_directory: input_data["scratch_location"] = config.scratch_directory if parse_version(self.get_version()) > parse_version("1.2"): caseless_keywords = { k.lower(): v for k, v in input_model.keywords.items() } if (input_model.molecule.molecular_multiplicity != 1) and ("reference" not in caseless_keywords): input_data["keywords"]["reference"] = "uhf" # Execute the program success, output = execute([which("psi4"), "--json", "data.json"], {"data.json": json.dumps(input_data)}, ["data.json"]) if success: output_data = json.loads(output["outfiles"]["data.json"]) if "extras" not in output_data: output_data["extras"] = {} # Check QCVars local_qcvars = output_data.pop("psi4:qcvars", None) if local_qcvars: # Edge case where we might already have qcvars, should not happen if "qcvars" in output_data["extras"]: output_data["extras"]["local_qcvars"] = local_qcvars else: output_data["extras"]["qcvars"] = local_qcvars if output_data["success"] is False: if "error_message" not in output_data["error"]: # older c. 1.3 message-only run_json output_data["error"] = { "error_type": "internal_error", "error_message": output_data["error"] } else: output_data = input_data output_data["error"] = { "error_type": "execution_error", "error_message": output["stderr"] } else: raise ResourceError("Psi4 version '{}' not understood.".format( self.get_version())) # Reset the schema if required output_data["schema_name"] = "qcschema_output" # Dispatch errors, PSIO Errors are not recoverable for future runs if output_data["success"] is False: error_message = output_data["error"]["error_message"] if "PSIO Error" in error_message: if "scratch directory" in error_message: # Psi4 cannot access the folder or file raise ResourceError(error_message) else: # Likely a random error, worth retrying raise RandomError(error_message) elif "SIGSEV" in error_message: raise RandomError(error_message) elif "TypeError: set_global_option" in error_message: raise InputError(error_message) elif "RHF reference is only for singlets" in error_message: raise InputError(error_message) else: raise UnknownError(error_message) # Move several pieces up a level output_data["provenance"]["memory"] = round( output_data.pop("memory") / (1024**3), 3) # Move back to GB output_data["provenance"]["nthreads"] = output_data.pop("nthreads") output_data["stdout"] = output_data.pop("raw_output", None) # Delete keys output_data.pop("return_output", None) output_data.pop("scratch_location", None) return Result(**output_data)
def parse_output(self, outfiles: Dict[str, str], input_model: 'ResultInput') -> 'Result': tree = ET.ElementTree(ET.fromstring(outfiles["dispatch.xml"])) root = tree.getroot() # print(root.tag) # TODO Read information from molecule tag # - cml:molecule, cml:atomArray (?) # - basisSet # - orbitals output_data = {} properties = {} extras = {} name_space = { 'molpro_uri': 'http://www.molpro.net/schema/molpro-output' } # Molpro commands map molpro_map = { "Energy": { "HF": "scf_total_energy", "RHF": "scf_total_energy", "UHF": "scf_total_energy", "KS": "scf_total_energy", "RKS": "scf_total_energy", "UKS": "scf_total_energy" }, "total energy": { "MP2": "mp2_total_energy", "CCSD": "ccsd_total_energy", "CCSD(T)": "ccsd_prt_pr_total_energy" }, "correlation energy": { "MP2": "mp2_correlation_energy", "CCSD": "ccsd_correlation_energy", "CCSD(T)": "ccsd_prt_pr_correlation_energy", # Need both CCSD(T) and Total "Total": "ccsd_prt_pr_correlation_energy" # Total corresponds to CCSD(T) correlation energy }, "singlet pair energy": { "MP2": "mp2_singlet_pair_energy", "CCSD": "ccsd_singlet_pair_energy" }, "triplet pair energy": { "MP2": "mp2_triplet_pair_energy", "CCSD": "ccsd_triplet_pair_energy" }, "Dipole moment": { "HF": "scf_dipole_moment", "RHF": "scf_dipole_moment", "UHF": "scf_dipole_moment", "KS": "scf_dipole_moment", "RKS": "scf_dipole_moment", "UKS": "scf_dipole_moment", "MP2": "mp2_dipole_moment", "CCSD": "ccsd_dipole_moment", "CCSD(T)": "ccsd_prt_pr_dipole_moment" } } # Started adding basic support for local correlation methods in Molpro molpro_extras_map = { "total energy": { "LMP2": "local_mp2_total_energy", "LCCSD": "local_ccsd_total_energy", "LCCSD(T0)": "local_ccsd_prt0_pr_total_energy", "LCCSD(T)": "local_ccsd_prt_pr_total_energy" }, "correlation energy": { "LMP2": "local_mp2_correlation_energy", "LCCSD": "local_ccsd_correlation_energy" }, "singlet pair energy": { "LMP2": "local_mp2_singlet_pair_energy", "LCCSD": "local_ccsd_singlet_pair_energy" }, "triplet pair energy": { "LMP2": "local_mp2_triplet_pair_energy", "LCCSD": "local_ccsd_triplet_pair_energy" }, "singles energy": { "LCCSD": "local_ccsd_singles_energy" }, # "strong pair energy": { # "LCCSD": "local_ccsd_strong_pair_energy" # }, # "weak pair energy": { # "LMP2": "local_mp2_weak_pair_energy" # } } # Molpro variables map used for quantities not found in the command map molpro_variable_map = { "_ENUC": "nuclear_repulsion_energy", "_DFTFUN": "scf_xc_energy", "_NELEC": ["calcinfo_nalpha", "calcinfo_nbeta"] # "_EMP2_SCS": "scs_mp2_total_energy" } # Process data in molpro_map # Loop through each jobstep # The jobstep tag in Molpro contains output from commands (e.g. {HF}, {force}) for jobstep in root.findall('molpro_uri:job/molpro_uri:jobstep', name_space): command = jobstep.attrib['command'] if 'FORCE' in command: # Grab gradient for child in jobstep.findall('molpro_uri:gradient', name_space): # Stores gradient as a single list where the ordering is [1x, 1y, 1z, 2x, 2y, 2z, ...] output_data['return_result'] = [ float(x) for x in child.text.split() ] else: # Grab energies and dipole moment for child in jobstep.findall('molpro_uri:property', name_space): property_name = child.attrib['name'] property_method = child.attrib['method'] value = child.attrib['value'] if property_name in molpro_map and property_method in molpro_map[ property_name]: if property_name == "Dipole moment": properties[molpro_map[property_name] [property_method]] = [ float(x) for x in value.split() ] else: properties[molpro_map[property_name] [property_method]] = float(value) elif property_name in molpro_extras_map and property_method in molpro_extras_map[ property_name]: extras[molpro_extras_map[property_name] [property_method]] = float(value) # Convert triplet and singlet pair correlation energies to opposite-spin and same-spin correlation energies if 'mp2_singlet_pair_energy' in properties and 'mp2_triplet_pair_energy' in properties: properties["mp2_same_spin_correlation_energy"] = ( 2.0 / 3.0) * properties['mp2_triplet_pair_energy'] properties["mp2_opposite_spin_correlation_energy"] = (1.0 / 3.0) \ * properties['mp2_triplet_pair_energy'] \ + properties['mp2_singlet_pair_energy'] del properties['mp2_singlet_pair_energy'] del properties['mp2_triplet_pair_energy'] if 'ccsd_singlet_pair_energy' in properties and 'ccsd_triplet_pair_energy' in properties: properties["ccsd_same_spin_correlation_energy"] = ( 2.0 / 3.0) * properties['ccsd_triplet_pair_energy'] properties["ccsd_opposite_spin_correlation_energy"] = (1.0 / 3.0) \ * properties['ccsd_triplet_pair_energy'] \ + properties['ccsd_singlet_pair_energy'] del properties['ccsd_singlet_pair_energy'] del properties['ccsd_triplet_pair_energy'] # Process data in molpro_variable_map # Note: For the DFT case molecule_method is the name of the functional plus R or U in front molecule = root.find('molpro_uri:job/molpro_uri:molecule', name_space) molecule_method = molecule.attrib['method'] molecule_final_energy = float( molecule.attrib['energy'] ) # Energy from the molecule tag in case its needed # Loop over each variable under the variables tag to grab additional info from molpro_variable_map for variable in molecule.findall( 'molpro_uri:variables/molpro_uri:variable', name_space): variable_name = variable.attrib['name'] if variable_name in molpro_variable_map: if variable_name == "_NELEC": nelec = int(float(variable[0].text)) nunpaired = (input_model.molecule.molecular_multiplicity - 1) nbeta = (nelec - nunpaired) // 2 nalpha = nelec - nbeta properties[molpro_variable_map[variable_name][0]] = nalpha properties[molpro_variable_map[variable_name][1]] = nbeta else: properties[molpro_variable_map[variable_name]] = float( variable[0].text) # Process basis set data basis_set = root.find( 'molpro_uri:job/molpro_uri:molecule/molpro_uri:basisSet', name_space) nbasis = int(basis_set.attrib['length']) # angular_type = basis_set.attrib['angular'] # cartesian vs spherical properties["calcinfo_nbasis"] = nbasis # Grab the method from input method = input_model.model.method.upper() # Determining the final energy # Throws an error if the energy isn't found for the method specified from the input_model. if method in molpro_map['total energy'].keys( ) and molpro_map['total energy'][method] in properties: final_energy = properties[molpro_map['total energy'][method]] elif method in molpro_map['Energy'].keys( ) and molpro_map['Energy'][method] in properties: final_energy = properties[molpro_map['Energy'][method]] else: # Back up method for determining final energy if not already present in properties # Use the total energy from the molecule tag if it matches the input method # if input_model.model.method.upper() in molecule_method: if method in molecule_method: final_energy = molecule_final_energy if method in self._post_hf_methods: properties[molpro_map['total energy'] [method]] = molecule_final_energy properties[molpro_map['correlation energy'] [method]] = molecule_final_energy - properties[ 'scf_total_energy'] elif method in self._dft_functionals: properties[molpro_map['Energy'] ["KS"]] = molecule_final_energy elif method in self._hf_methods: properties[molpro_map['Energy'] [method]] = molecule_final_energy else: raise KeyError(f"Could not find {method} total energy") # Determining return_result if "return_result" not in output_data: output_data["return_result"] = final_energy # Final output_data assignments needed for the Result object output_data["properties"] = properties output_data["extras"] = extras output_data['schema_name'] = 'qcschema_output' output_data['stdout'] = outfiles["dispatch.out"] output_data['success'] = True return Result(**{**input_model.dict(), **output_data})
def compute(self, input_data: 'ResultInput', config: 'JobConfig') -> 'Result': """ Runs TorchANI in FF typing """ # Check if existings and version self.found(raise_error=True) if parse_version(self.get_version()) < parse_version("0.5"): ret_data["error"] = ComputeError( error_type="version_error", error_message="QCEngine's TorchANI wrapper requires version 0.5 or greater.") return FailedOperation(input_data=input_data.dict(), **ret_data) import torch import numpy as np device = torch.device('cpu') # Failure flag ret_data = {"success": False} # Build model model = self.get_model(input_data.model.method) if model is False: ret_data["error"] = ComputeError( error_type="input_error", error_message="run_torchani only accepts the ANI1x or ANI1ccx method.") return FailedOperation(input_data=input_data.dict(), **ret_data) # Build species species = "".join(input_data.molecule.symbols) unknown_sym = set(species) - {"H", "C", "N", "O"} if unknown_sym: ret_data["error"] = ComputeError( error_type="input_error", error_message="The '{}' model does not support symbols: {}.".format( input_data.model.method, unknown_sym)) return FailedOperation(input_data=input_data.dict(), **ret_data) species = model.species_to_tensor(species).to(device).unsqueeze(0) # Build coord array geom_array = input_data.molecule.geometry.reshape(1, -1, 3) * ureg.conversion_factor("bohr", "angstrom") coordinates = torch.tensor(geom_array.tolist(), requires_grad=True, device=device) _, energy = model((species, coordinates)) ret_data["properties"] = {"return_energy": energy.item()} if input_data.driver == "energy": ret_data["return_result"] = ret_data["properties"]["return_energy"] elif input_data.driver == "gradient": derivative = torch.autograd.grad(energy.sum(), coordinates)[0].squeeze() ret_data["return_result"] = np.asarray( derivative * ureg.conversion_factor("angstrom", "bohr")).ravel().tolist() else: ret_data["error"] = ComputeError( error_type="input_error", error_message="run_torchani did not understand driver method '{}'.".format(input_data.driver)) return FailedOperation(input_data=input_data.dict(), **ret_data) ret_data["provenance"] = Provenance( creator="torchani", version="unknown", routine='torchani.builtin.aev_computer') ret_data["schema_name"] = "qcschema_output" ret_data["success"] = True # Form up a dict first, then sent to BaseModel to avoid repeat kwargs which don't override each other return Result(**{**input_data.dict(), **ret_data})
def compute(self, input_data: 'ResultInput', config: 'JobConfig') -> 'Result': """ Runs RDKit in FF typing """ try: import rdkit from rdkit import Chem from rdkit.Chem import AllChem except ModuleNotFoundError: raise ModuleNotFoundError( "Could not find RDKit in the Python path.") # Failure flag ret_data = {"success": False} # Build the Molecule jmol = input_data.molecule # Handle errors if abs(jmol.molecular_charge) > 1.e-6: ret_data["error"] = ComputeError( error_type="input_error", error_message= "run_rdkit does not currently support charged molecules") return FailedOperation(input_data=input_data.dict(), **ret_data) if not jmol.connectivity: # Check for empty list ret_data["error"] = ComputeError( error_type="input_error", error_message= "run_rdkit molecule must have a connectivity graph") return FailedOperation(input_data=input_data.dict(), **ret_data) # Build out the base molecule base_mol = Chem.Mol() rw_mol = Chem.RWMol(base_mol) for sym in jmol.symbols: rw_mol.AddAtom(Chem.Atom(sym.title())) # Add in connectivity bond_types = { 1: Chem.BondType.SINGLE, 2: Chem.BondType.DOUBLE, 3: Chem.BondType.TRIPLE } for atom1, atom2, bo in jmol.connectivity: rw_mol.AddBond(atom1, atom2, bond_types[bo]) mol = rw_mol.GetMol() # Write out the conformer natom = len(jmol.symbols) conf = Chem.Conformer(natom) bohr2ang = ureg.conversion_factor("bohr", "angstrom") for line in range(natom): conf.SetAtomPosition(line, (bohr2ang * jmol.geometry[line, 0], bohr2ang * jmol.geometry[line, 1], bohr2ang * jmol.geometry[line, 2])) # yapf: disable mol.AddConformer(conf) Chem.rdmolops.SanitizeMol(mol) if input_data.model.method.lower() == "uff": ff = AllChem.UFFGetMoleculeForceField(mol) all_params = AllChem.UFFHasAllMoleculeParams(mol) else: ret_data["error"] = ComputeError( error_type="input_error", error_message="run_rdkit can only accepts UFF methods") return FailedOperation(input_data=input_data.dict(), **ret_data) if all_params is False: ret_data["error"] = ComputeError( error_type="input_error", error_message= "run_rdkit did not match all parameters to molecule") return FailedOperation(input_data=input_data.dict(), **ret_data) ff.Initialize() ret_data["properties"] = { "return_energy": ff.CalcEnergy() * ureg.conversion_factor("kJ / mol", "hartree") } if input_data.driver == "energy": ret_data["return_result"] = ret_data["properties"]["return_energy"] elif input_data.driver == "gradient": coef = ureg.conversion_factor("kJ / mol", "hartree") * ureg.conversion_factor( "angstrom", "bohr") ret_data["return_result"] = [x * coef for x in ff.CalcGrad()] else: ret_data["error"] = ComputeError( error_type="input_error", error_message="run_rdkit did not understand driver method " "'{}'.".format(ret_data["driver"])) return FailedOperation(input_data=input_data.dict(), **ret_data) ret_data["provenance"] = Provenance( creator="rdkit", version=rdkit.__version__, routine="rdkit.Chem.AllChem.UFFGetMoleculeForceField") ret_data["schema_name"] = "qcschema_output" ret_data["success"] = True # Form up a dict first, then sent to BaseModel to avoid repeat kwargs which don't override each other return Result(**{**input_data.dict(), **ret_data})
def compute(self, input_data: 'ResultInput', config: 'JobConfig') -> 'Result': """ Runs TorchANI in FF typing """ # Check if existings and version self.found(raise_error=True) if parse_version(self.get_version()) < parse_version("0.9"): raise ResourceError( "QCEngine's TorchANI wrapper requires version 0.9 or greater.") import torch import torchani import numpy as np device = torch.device('cpu') # Failure flag ret_data = {"success": False} # Build model model = self.get_model(input_data.model.method) if model is False: raise InputError( "TorchANI only accepts the ANI1x or ANI1ccx method.") # Build species species = "".join(input_data.molecule.symbols) unknown_sym = set(species) - {"H", "C", "N", "O"} if unknown_sym: raise InputError( f"TorchANI model '{input_data.model.method}' does not support symbols: {unknown_sym}." ) num_atoms = len(species) species = model.species_to_tensor(species).to(device).unsqueeze(0) # Build coord array geom_array = input_data.molecule.geometry.reshape( 1, -1, 3) * ureg.conversion_factor("bohr", "angstrom") coordinates = torch.tensor(geom_array.tolist(), requires_grad=True, device=device) _, energy_array = model((species, coordinates)) energy = energy_array.mean() ensemble_std = energy_array.std() ensemble_scaled_std = ensemble_std / np.sqrt(num_atoms) ret_data["properties"] = {"return_energy": energy.item()} if input_data.driver == "energy": ret_data["return_result"] = ret_data["properties"]["return_energy"] elif input_data.driver == "gradient": derivative = torch.autograd.grad(energy.sum(), coordinates)[0].squeeze() ret_data["return_result"] = np.asarray( derivative * ureg.conversion_factor("angstrom", "bohr")).ravel().tolist() elif input_data.driver == "hessian": hessian = torchani.utils.hessian(coordinates, energies=energy) ret_data["return_result"] = np.asarray(hessian) else: raise InputError( f"TorchANI can only compute energy, gradient, and hessian driver methods. Found {input_data.driver}." ) ####################################################################### # Description of the quantities stored in `extras` # # ensemble_energies: # An energy array of all members (models) in an ensemble of models # # ensemble_energy_avg: # The average value of energy array which is also recorded with as # `energy` in QCEngine # # ensemble_energy_std: # The standard deviation of energy array # # ensemble_per_root_atom_disagreement: # The standard deviation scaled by the square root of N, with N being # the number of atoms in the molecule. This is the quantity used in # the query-by-committee (QBC) process in active learning to infer # the reliability of the models in an ensemble, and produce more data # points in the regions where this quantity is below a certain # threshold (inclusion criteria) ret_data["extras"] = { "ensemble_energies": energy_array.detach().numpy(), "ensemble_energy_avg": energy.item(), "ensemble_energy_std": ensemble_std.item(), "ensemble_per_root_atom_disagreement": ensemble_scaled_std.item() } ret_data["provenance"] = Provenance( creator="torchani", version="unknown", routine='torchani.builtin.aev_computer') ret_data["schema_name"] = "qcschema_output" ret_data["success"] = True # Form up a dict first, then sent to BaseModel to avoid repeat kwargs which don't override each other return Result(**{**input_data.dict(), **ret_data})
def parse_output(self, outfiles: Dict[str, str], input_model: 'ResultInput') -> 'Result': tree = ET.ElementTree(ET.fromstring(outfiles["dispatch.xml"])) root = tree.getroot() # print(root.tag) # TODO Read information from molecule tag # - cml:molecule, cml:atomArray (?) # - basisSet # - orbitals output_data = {} properties = {} name_space = { 'molpro_uri': 'http://www.molpro.net/schema/molpro-output' } # Molpro commands map molpro_map = { "Energy": { "HF": "scf_total_energy", "RHF": "scf_total_energy", "KS": "scf_total_energy", "RKS": "scf_total_energy" }, "total energy": { "MP2": "mp2_total_energy", "CCSD": "ccsd_total_energy", "CCSD(T)": "ccsd_prt_pr_total_energy" }, "correlation energy": { "MP2": "mp2_correlation_energy", "CCSD": "ccsd_correlation_energy", "CCSD(T)": "ccsd_prt_pr_correlation_energy", # Need both CCSD(T) and Total "Total": "ccsd_prt_pr_correlation_energy" # Total corresponds to CCSD(T) correlation energy }, "singlet pair energy": { "MP2": "mp2_singlet_pair_energy", "CCSD": "ccsd_singlet_pair_energy" }, "triplet pair energy": { "MP2": "mp2_triplet_pair_energy", "CCSD": "ccsd_triplet_pair_energy" }, "Dipole moment": { "HF": "scf_dipole_moment", "RHF": "scf_dipole_moment", "KS": "scf_dipole_moment", "RKS": "scf_dipole_moment", "MP2": "mp2_dipole_moment", "CCSD": "ccsd_dipole_moment", "CCSD(T)": "ccsd_prt_pr_dipole_moment" } } # Molpro variables map used for quantities not found in the command map molpro_var_map = { "_ENUC": "nuclear_repulsion_energy", "_DFTFUN": "scf_xc_energy" # "_EMP2_SCS": "scs_mp2_total_energy" } # Loop through each jobstep # The jobstep tag in Molpro contains output from commands (e.g. {hf}, {force}) for jobstep in root.findall('molpro_uri:job/molpro_uri:jobstep', name_space): # Remove the -SCF part of the command string when Molpro calls HF or KS command = jobstep.attrib['command'] if '-SCF' in command: command = command[:-4] # Grab energies and dipole moment if command in self._supported_methods: for child in jobstep.findall('molpro_uri:property', name_space): prop_name = child.attrib['name'] prop_method = child.attrib['method'] value = child.attrib['value'] if prop_name in molpro_map: if prop_method in molpro_map[prop_name]: if prop_name == "Dipole moment": properties[molpro_map[prop_name] [prop_method]] = [ float(x) for x in value.split() ] else: properties[molpro_map[prop_name] [prop_method]] = float(value) # Grab gradient elif 'FORCE' in command: for child in jobstep.findall('molpro_uri:gradient', name_space): # Stores gradient as a single list where the ordering is [1x, 1y, 1z, 2x, 2y, 2z, ...] output_data['return_result'] = [ float(x) for x in child.text.split() ] # Look for final energy in the molecule tag in case it's needed # Note: For the DFT case mol_method is the name of the functional plus R or U in front molecule = root.find('molpro_uri:job/molpro_uri:molecule', name_space) mol_method = molecule.attrib['method'] mol_final_energy = float(molecule.attrib['energy']) # Loop over each variable under the variables tag to grab additional info from molpro_var_map for variable in molecule.findall( 'molpro_uri:variables/molpro_uri:variable', name_space): var_name = variable.attrib['name'] if var_name in molpro_var_map: properties[molpro_var_map[var_name]] = float(variable[0].text) # Convert triplet and singlet pair correlation energies to opposite-spin and same-spin correlation energies if 'mp2_singlet_pair_energy' in properties and 'mp2_triplet_pair_energy' in properties: properties["mp2_same_spin_correlation_energy"] = ( 2.0 / 3.0) * properties['mp2_triplet_pair_energy'] properties["mp2_opposite_spin_correlation_energy"] = (1.0 / 3.0) \ * properties['mp2_triplet_pair_energy'] \ + properties['mp2_singlet_pair_energy'] del properties['mp2_singlet_pair_energy'] del properties['mp2_triplet_pair_energy'] if 'ccsd_singlet_pair_energy' in properties and 'ccsd_triplet_pair_energy' in properties: properties["ccsd_same_spin_correlation_energy"] = ( 2.0 / 3.0) * properties['ccsd_triplet_pair_energy'] properties["ccsd_opposite_spin_correlation_energy"] = (1.0 / 3.0) \ * properties['ccsd_triplet_pair_energy'] \ + properties['ccsd_singlet_pair_energy'] del properties['ccsd_singlet_pair_energy'] del properties['ccsd_triplet_pair_energy'] # Grab the method from input method = input_model.model.method if method.upper( ) in self._dft_functionals: # Determine if method is a DFT functional method = "RKS" # A slightly more robust way of determining the final energy. # Throws an error if the energy isn't found for the method specified from the input_model. if method in self._post_hf_methods and molpro_map['total energy'][ method] in properties: final_energy = properties[molpro_map['total energy'][method]] elif method in self._scf_methods and molpro_map['Energy'][ method] in properties: final_energy = properties[molpro_map['Energy'][method]] else: # Back up method for determining final energy if not already present in properties # Use the total energy from the molecule tag if it matches the input method if input_model.model.method in mol_method: final_energy = mol_final_energy if method in self._post_hf_methods: properties[molpro_map['total energy'] [method]] = mol_final_energy properties[molpro_map['correlation energy'] [method]] = mol_final_energy - properties[ 'scf_total_energy'] elif method in self._scf_methods: properties[molpro_map['Energy'][method]] = mol_final_energy else: raise KeyError(f"Could not find {method} total energy") # Replace return_result with final_energy if gradient wasn't called if "return_result" not in output_data: output_data["return_result"] = final_energy # Final output_data assignments needed for the Result object output_data["properties"] = properties output_data['schema_name'] = 'qcschema_output' output_data['stdout'] = outfiles["dispatch.out"] output_data['success'] = True return Result(**{**input_model.dict(), **output_data})
def parse_output(self, outfiles: Dict[str, str], input_model: 'ResultInput') -> 'Result': tree = ET.ElementTree(ET.fromstring(outfiles["dispatch.xml"])) root = tree.getroot() # print(root.tag) # TODO Try to grab the last total energy in the general case? # - Would be useful for arbitrarily complicated input file # - However would need every different string used to specify energy (e.g. HF --> Energy, MP2 --> total energy) # TODO Read information from molecule tag # - cml:molecule, cml:atomArray (?) # - basisSet # - orbitals output_data = {} name_space = { 'molpro_uri': 'http://www.molpro.net/schema/molpro-output' } # TODO Create enum class to take jobstep.attrib['command'] and determine what method it is # methods_set = {'HF', 'RHF', 'MP2', 'CCSD'} mp2_map = { "total energy": "mp2_total_energy", "correlation energy": "mp2_total_correlation_energy", "singlet pair energy": "mp2_same_spin_correlation_energy", "triplet pair energy": "mp2_opposite_spin_correlation_energy", } properties = {} # The jobstep tag in Molpro contains output from commands (e.g. {hf}, {force}) for jobstep in root.findall('molpro_uri:job/molpro_uri:jobstep', name_space): # print("jobstep.tag: ") # print(jobstep.tag) if 'SCF' in jobstep.attrib['command']: # Grab properties (e.g. Energy and Dipole moment) for child in jobstep.findall('molpro_uri:property', name_space): if child.attrib['name'] == 'Energy': # properties['scf_method'] = child.attrib['method'] properties['scf_total_energy'] = float( child.attrib['value']) elif child.attrib['name'] == 'Dipole moment': properties['scf_dipole_moment'] = [ float(x) for x in child.attrib['value'].split() ] elif 'MP2' in jobstep.attrib['command']: # Grab properties (e.g. Energy and Dipole moment) for child in jobstep.findall('molpro_uri:property', name_space): if child.attrib['name'] in mp2_map: properties[mp2_map[child.attrib['name']]] = float( child.attrib['value']) # Grab gradient # TODO Handle situation where there are multiple FORCE calls elif 'FORCE' in jobstep.attrib['command']: # Grab properties (e.g. Energy and Dipole moment) for child in jobstep.findall('molpro_uri:gradient', name_space): # print("gradient.attrib: ") # print(child.attrib) # Stores gradient as a single list where the ordering is [1x, 1y, 1z, 2x, 2y, 2z, ...] output_data['return_result'] = [ float(x) for x in child.text.split() ] # A _bad_ way of figuring the correct energy # TODO Maybe a better way would be to use the method specified from the input? if "return_result" not in output_data: if "mp2_total_energy" in properties: output_data["return_result"] = properties["mp2_total_energy"] elif "scf_total_energy" in properties: output_data["return_result"] = properties["scf_total_energy"] else: raise KeyError("Could not find SCF total energy") output_data["properties"] = properties output_data['schema_name'] = 'qcschema_output' # TODO Should only return True if Molpro calculation terminated properly output_data['success'] = True return Result(**{**input_model.dict(), **output_data})
def parse_output(self, outfiles: Dict[str, str], input_model: 'ResultInput') -> 'Result': output_data = {} properties = {} # Parse the output file, collect properties and gradient output_lines = outfiles["tc.out"].split('\n') gradients = [] natom = 0 line_final_energy = -1 line_scf_header = -1 for idx,line in enumerate(output_lines): if "FINAL ENERGY" in line: properties["scf_total_energy"] = float(line.strip('\n').split()[2]) line_final_energy = idx elif "Start SCF Iterations" in line: line_scf_header = idx elif "Total atoms" in line: natom = int(line.split()[-1]) elif "DIPOLE MOMENT" in line: newline = line.replace(',','').replace('}','').replace('{','') properties["scf_dipole_moment"] = [ float(x) for x in newline.split()[2:5] ] elif "Nuclear repulsion energy" in line: properties["nuclear_repulsion_energy"] = float(line.split()[-2]) elif "Gradient units are Hartree/Bohr" in line: #Gradient is stored as (dE/dx1,dE/dy1,dE/dz1,dE/dx2,dE/dy2,...) for i in range(idx+3,idx+3+natom): grad = output_lines[i].strip('\n').split() for x in grad: gradients.append( float(x) ) # Look for the last line that is the SCF info DECIMAL = r"""( (?:[-+]?\d*\.\d+(?:[DdEe][-+]?\d+)?) | # .num with optional sign, exponent, wholenum (?:[-+]?\d+\.\d*(?:[DdEe][-+]?\d+)?) # num. with optional sign, exponent, decimals )""" last_scf_line = "" for idx in reversed(range(line_scf_header, line_final_energy)): mobj = re.search( r'^\s*\d+\s+' + DECIMAL + r'\s+' + DECIMAL + r'\s+' + DECIMAL + r'\s+' + DECIMAL , output_lines[idx], re.VERBOSE) if mobj: last_scf_line = output_lines[idx] break if len(last_scf_line) > 0: properties["scf_iterations"] = int(last_scf_line.split()[0]) if "XC Energy" in output_lines: properties["scf_xc_energy"] = float(last_scf_line.split()[4]) else: raise ValueError("SCF iteration lines not found in TeraChem output") if len(gradients) > 0: output_data["return_result"] = gradients # Commented out the properties currently not supported by QCSchema #properites["spin_S2"] = 1 # calculated S(S+1) # elif "SPIN S-SQUARED" in line: # properties["spin_S2"] = float(line.strip('\n').split()[2]) # Parse files in scratch folder #properties["atomic_charge"] = [] #atomic_charge_lines = open(outfiles["charge.xls"]).readlines() #for line in atomic_charge_lines: # properties["atomic_charge"].append(line.strip('\n').split()[-1]) if "return_result" not in output_data: if "scf_total_energy" in properties: output_data["return_result"] = properties["scf_total_energy"] else: raise KeyError("Could not find SCF total energy") output_data["properties"] = properties output_data['schema_name'] = 'qcschema_output' output_data['stdout'] = outfiles["tc.out"] # TODO Should only return True if TeraChem calculation terminated properly output_data['success'] = True return Result(**{**input_model.dict(), **output_data})
def parse_output(self, outfiles: Dict[str, str], input_model: 'ResultInput') -> 'Result': output_data = {} properties = {} # Parse the output file, collect properties and gradient output_lines = outfiles["tc.out"].split('\n') gradients = [] natom = 0 for idx, line in enumerate(output_lines): if "FINAL ENERGY" in line: properties["scf_total_energy"] = float( line.strip('\n').split()[2]) last_scf_line = output_lines[idx - 2] properties["scf_iterations"] = int(last_scf_line.split()[0]) if "XC Energy" in output_lines: properties["scf_xc_energy"] = float( last_scf_line.split()[4]) elif "Total atoms" in line: natom = int(line.split()[-1]) elif "DIPOLE MOMENT" in line: newline = line.replace(',', '').replace('}', '').replace('{', '') properties["scf_dipole_moment"] = [ float(x) for x in newline.split()[2:5] ] elif "Nuclear repulsion energy" in line: properties["nuclear_repulsion_energy"] = float( line.split()[-2]) elif "Gradient units are Hartree/Bohr" in line: #Gradient is stored as (dE/dx1,dE/dy1,dE/dz1,dE/dx2,dE/dy2,...) for i in range(idx + 3, idx + 3 + natom): grad = output_lines[i].strip('\n').split() for x in grad: gradients.append(float(x)) if len(gradients) > 0: output_data["return_result"] = gradients # Commented out the properties currently not supported by QCSchema #properites["spin_S2"] = 1 # calculated S(S+1) # elif "SPIN S-SQUARED" in line: # properties["spin_S2"] = float(line.strip('\n').split()[2]) # Parse files in scratch folder #properties["atomic_charge"] = [] #atomic_charge_lines = open(outfiles["charge.xls"]).readlines() #for line in atomic_charge_lines: # properties["atomic_charge"].append(line.strip('\n').split()[-1]) if "return_result" not in output_data: if "scf_total_energy" in properties: output_data["return_result"] = properties["scf_total_energy"] else: raise KeyError("Could not find SCF total energy") output_data["properties"] = properties output_data['schema_name'] = 'qcschema_output' # TODO Should only return True if TeraChem calculation terminated properly output_data['success'] = True return Result(**{**input_model.dict(), **output_data})
def parse_output(self, outfiles: Dict[str, str], input_model: 'ResultInput') -> 'Result': keep_keys = { "heat_of_formation", "energy_electronic", "energy_nuclear", "gradient_norm", "dip_vec", "spin_component", "total_spin", "molecular_weight", "molecular_weight", "total_energy", "gradients", "mopac_version", "atom_charges", "point_group" } # Convert back to atomic units conversions = { "KCAL/MOL": 1 / self.extras["hartree_to_kcalmol"], 'KCAL/MOL/ANGSTROM': self.extras["bohr_to_angstroms"] / self.extras["hartree_to_kcalmol"], "EV": 1 / self.extras["hartree_to_ev"], "DEBYE": 1 / self.extras["au_to_debye"], "AMU": 1, None: 1 } data = {} last_key = None # Parse the weird structure for line in outfiles["dispatch.aux"].splitlines(): if ("START" in line) or ("END" in line) or ("#" in line): continue if "=" in line: # Primary split key, value = line.split("=", 1) # Format key, may have units # IONIZATION_POTENTIAL:EV # GRADIENTS:KCAL/MOL/ANGSTROM[09] key_list = key.split(":", 1) if len(key_list) == 1: key, units = key_list[0], None else: key, units = key.split(":", 1) # Pop off [xx] items if units and "[" in units: units, _ = units.split("[", 1) if "[" in key: key, _ = key.split("[", 1) key = key.strip().lower() last_key = key # Skip keys that are not useful if key not in keep_keys: last_key = None continue # 1D+3 -> 1E3 conversion cf = conversions[units] value = value.strip().replace("D+", "E+").replace("D-", "E-") if ("E+" in value) or ("E-" in value): if value.count("E") > 1: value = [float(x) * cf for x in value.split()] else: value = float(value) * cf if value == "": value = [] data[key] = (cf, value) else: if last_key is None: continue cf = data[last_key][0] data[last_key][1].extend([float(x) * cf for x in line.split()]) data = {k: v[1] for k, v in data.items()} # for k, v in data.items(): # print(k, v) gradient = data.pop("gradients") output = input_model.dict() output["provenance"] = { "creator": "mopac", "version": data.pop("mopac_version") } output["properties"] = {} output["properties"]["return_energy"] = data["heat_of_formation"] output["extras"].update(data) if input_model.driver == "energy": output["return_result"] = data["heat_of_formation"] else: output["return_result"] = gradient output['stdout'] = outfiles["dispatch.out"] output["success"] = True return Result(**output)
def torchani(input_data, config): """ Runs TorchANI in FF typing """ import numpy as np try: import torch except ImportError: raise ImportError("Could not find PyTorch in the Python path.") try: import torchani except ImportError: raise ImportError("Could not find TorchANI in the Python path.") device = torch.device('cpu') builtin = torchani.neurochem.Builtins() # Failure flag ret_data = {"success": False} # Build model model = get_model(input_data.model.method) if model is False: ret_data["error"] = ComputeError( error_type="input_error", error_message="run_torchani only accepts the ANI1 method.") return FailedOperation(input_data=input_data.dict(), **ret_data) # Build species species = "".join(input_data.molecule.symbols) unknown_sym = set(species) - {"H", "C", "N", "O"} if unknown_sym: ret_data["error"] = ComputeError( error_type="input_error", error_message="The '{}' model does not support symbols: {}.". format(input_data.model.method, unknown_sym)) return FailedOperation(input_data=input_data.dict(), **ret_data) species = builtin.consts.species_to_tensor(species).to(device).unsqueeze(0) # Build coord array geom_array = input_data.molecule.geometry.reshape( 1, -1, 3) * ureg.conversion_factor("bohr", "angstrom") coordinates = torch.tensor(geom_array.tolist(), requires_grad=True, device=device) _, energy = model((species, coordinates)) ret_data["properties"] = {"return_energy": energy.item()} if input_data.driver == "energy": ret_data["return_result"] = ret_data["properties"]["return_energy"] elif input_data.driver == "gradient": derivative = torch.autograd.grad(energy.sum(), coordinates)[0].squeeze() ret_data["return_result"] = np.asarray( derivative * ureg.conversion_factor("angstrom", "bohr")).ravel().tolist() else: ret_data["error"] = ComputeError( error_type="input_error", error_message="run_torchani did not understand driver method '{}'." .format(input_data.driver)) return FailedOperation(input_data=input_data.dict(), **ret_data) ret_data["provenance"] = Provenance( creator="torchani", version="unknown", routine='torchani.builtin.aev_computer') ret_data["schema_name"] = "qcschema_output" ret_data["success"] = True # Form up a dict first, then sent to BaseModel to avoid repeat kwargs which don't override each other return Result(**{**input_data.dict(), **ret_data})
def compute(self, input_model: 'ResultInput', config: 'JobConfig') -> 'Result': """ Runs Psi4 in API mode """ self.found(raise_error=True) # Setup the job input_data = input_model.json_dict() input_data["nthreads"] = config.ncores input_data["memory"] = int(config.memory * 1024 * 1024 * 1024 * 0.95) # Memory in bytes input_data["success"] = False input_data["return_output"] = True if input_data["schema_name"] == "qcschema_input": input_data["schema_name"] = "qc_schema_input" if config.scratch_directory: input_data["scratch_location"] = config.scratch_directory if parse_version(self.get_version()) > parse_version("1.2"): caseless_keywords = { k.lower(): v for k, v in input_model.keywords.items() } if (input_model.molecule.molecular_multiplicity != 1) and ("reference" not in caseless_keywords): input_data["keywords"]["reference"] = "uhf" # Execute the program success, output = execute([which("psi4"), "--json", "data.json"], {"data.json": json.dumps(input_data)}, ["data.json"]) if success: output_data = json.loads(output["outfiles"]["data.json"]) if "extras" not in output_data: output_data["extras"] = {} output_data["extras"]["local_qcvars"] = output_data.pop( "psi4:qcvars", None) if output_data["success"] is False: if "error_message" not in output_data["error"]: # older c. 1.3 message-only run_json output_data["error"] = { "error_type": "internal_error", "error_message": output_data["error"] } else: output_data = input_data output_data["error"] = { "error_type": "execution_error", "error_message": output["stderr"] } else: raise TypeError("Psi4 version '{}' not understood.".format( self.get_version())) # Reset the schema if required output_data["schema_name"] = "qcschema_output" # Dispatch errors, PSIO Errors are not recoverable for future runs if output_data["success"] is False: if "PSIO Error" in output_data["error"]: raise ValueError(output_data["error"]) # Move several pieces up a level if output_data["success"]: output_data["provenance"]["memory"] = round( output_data.pop("memory") / (1024**3), 3) # Move back to GB output_data["provenance"]["nthreads"] = output_data.pop("nthreads") output_data["stdout"] = output_data.pop("raw_output", None) # Delete keys output_data.pop("return_output", None) output_data.pop("scratch_location", None) return Result(**output_data) else: return FailedOperation(success=output_data.pop("success", False), error=output_data.pop("error"), input_data=output_data)
def parse_output(self, outfiles: Dict[str, str], input_model: 'ResultInput') -> 'Result': stdout = outfiles.pop("stdout") for fl, contents in outfiles.items(): if contents is not None: # LOG text += f'\n MP2D scratch file {fl} has been read.\n' pass # parse energy output (could go further and break into UCHF, CKS) real = np.array(input_model.molecule.real) full_nat = real.shape[0] real_nat = np.sum(real) for ln in stdout.splitlines(): if re.match(' MP2D dispersion correction Eh', ln): ene = Decimal(ln.split()[4]) elif re.match('Atomic Coordinates in Angstroms', ln): break else: if not ((real_nat == 1) and (input_model.driver == 'gradient')): raise UnknownError('Unknown issue occured.') # parse gradient output if outfiles['mp2d_gradient'] is not None: srealgrad = outfiles['mp2d_gradient'] realgrad = np.fromstring(srealgrad, count=3 * real_nat, sep=' ').reshape((-1, 3)) if input_model.driver == 'gradient': ireal = np.argwhere(real).reshape((-1)) fullgrad = np.zeros((full_nat, 3)) try: fullgrad[ireal, :] = realgrad except NameError as exc: raise UnknownError('Unsuccessful gradient collection.') from exc qcvkey = input_model.extras['info']['fctldash'].upper() calcinfo = [] calcinfo.append(qcel.Datum('CURRENT ENERGY', 'Eh', ene)) calcinfo.append(qcel.Datum('DISPERSION CORRECTION ENERGY', 'Eh', ene)) calcinfo.append(qcel.Datum('2-BODY DISPERSION CORRECTION ENERGY', 'Eh', ene)) if qcvkey: calcinfo.append(qcel.Datum(f'{qcvkey} DISPERSION CORRECTION ENERGY', 'Eh', ene)) if input_model.driver == 'gradient': calcinfo.append(qcel.Datum('CURRENT GRADIENT', 'Eh/a0', fullgrad)) calcinfo.append(qcel.Datum('DISPERSION CORRECTION GRADIENT', 'Eh/a0', fullgrad)) calcinfo.append(qcel.Datum('2-BODY DISPERSION CORRECTION GRADIENT', 'Eh/a0', fullgrad)) if qcvkey: calcinfo.append(qcel.Datum(f'{qcvkey} DISPERSION CORRECTION GRADIENT', 'Eh/a0', fullgrad)) #LOGtext += qcel.datum.print_variables({info.label: info for info in calcinfo}) calcinfo = {info.label: info.data for info in calcinfo} #calcinfo = qcel.util.unnp(calcinfo, flat=True) # got to even out who needs plump/flat/Decimal/float/ndarray/list # Decimal --> str preserves precision calcinfo = { k.upper(): str(v) if isinstance(v, Decimal) else v for k, v in qcel.util.unnp(calcinfo, flat=True).items() } # jobrec['properties'] = {"return_energy": ene} # jobrec["molecule"]["real"] = list(jobrec["molecule"]["real"]) retres = calcinfo[f'CURRENT {input_model.driver.upper()}'] if isinstance(retres, Decimal): retres = float(retres) elif isinstance(retres, np.ndarray): retres = retres.ravel().tolist() output_data = { 'extras': input_model.extras, 'properties': {}, 'provenance': Provenance(creator="MP2D", version=self.get_version(), routine=__name__ + '.' + sys._getframe().f_code.co_name), 'return_result': retres, 'stdout': stdout, } # yapf: disable output_data["extras"]["local_keywords"] = input_model.extras['info'] output_data["extras"]["qcvars"] = calcinfo output_data['success'] = True return Result(**{**input_model.dict(), **output_data})
def parse_output(self, outfiles: Dict[str, str], input_model: 'ResultInput') -> 'Result': tree = ET.ElementTree(ET.fromstring(outfiles["dispatch.xml"])) root = tree.getroot() # print(root.tag) # TODO Think of how to handle multiple calls of same command. # Currently it will grab the last one in the file. # TODO Read information from molecule tag # - cml:molecule, cml:atomArray (?) # - basisSet # - orbitals output_data = {} properties = {} name_space = { 'molpro_uri': 'http://www.molpro.net/schema/molpro-output' } # SCF maps scf_energy_map = {"Energy": "scf_total_energy"} scf_dipole_map = {"Dipole moment": "scf_dipole_moment"} scf_extras = {} # MP2 maps mp2_energy_map = { "total energy": "mp2_total_energy", "correlation energy": "mp2_correlation_energy", "singlet pair energy": "mp2_singlet_pair_energy", "triplet pair energy": "mp2_triplet_pair_energy" } mp2_dipole_map = {"Dipole moment": "mp2_dipole_moment"} mp2_extras = {} # CCSD maps ccsd_energy_map = { "total energy": "ccsd_total_energy", "correlation energy": "ccsd_correlation_energy", "singlet pair energy": "ccsd_singlet_pair_energy", "triplet pair energy": "ccsd_triplet_pair_energy" } ccsd_dipole_map = {"Dipole moment": "ccsd_dipole_moment"} ccsd_extras = {} # Compiling the method maps scf_maps = { "energy": scf_energy_map, "dipole": scf_dipole_map, "extras": scf_extras } mp2_maps = { "energy": mp2_energy_map, "dipole": mp2_dipole_map, "extras": mp2_extras } ccsd_maps = { "energy": ccsd_energy_map, "dipole": ccsd_dipole_map, "extras": ccsd_extras } scf_methods = {"HF": scf_maps, "RHF": scf_maps} post_hf_methods = {"MP2": mp2_maps, "CCSD": ccsd_maps} supported_methods = {**scf_methods, **post_hf_methods} # The jobstep tag in Molpro contains output from commands (e.g. {hf}, {force}) for jobstep in root.findall('molpro_uri:job/molpro_uri:jobstep', name_space): # Remove the -SCF part of the command string when Molpro calls HF or KS command = jobstep.attrib['command'] if '-SCF' in command: command = command[:-4] # Grab energies and dipole moment if command in supported_methods: for child in jobstep.findall('molpro_uri:property', name_space): if child.attrib['name'] in supported_methods[command][ 'energy']: properties[supported_methods[command]['energy'][ child.attrib['name']]] = float( child.attrib['value']) elif child.attrib['name'] in supported_methods[command][ 'dipole']: properties[supported_methods[command]['dipole'][ child.attrib['name']]] = [ float(x) for x in child.attrib['value'].split() ] # Grab gradient elif 'FORCE' in jobstep.attrib['command']: for child in jobstep.findall('molpro_uri:gradient', name_space): # Stores gradient as a single list where the ordering is [1x, 1y, 1z, 2x, 2y, 2z, ...] output_data['return_result'] = [ float(x) for x in child.text.split() ] # Convert triplet and singlet pair correlation energies to opposite-spin and same-spin correlation energies if 'mp2_singlet_pair_energy' in properties and 'mp2_triplet_pair_energy' in properties: properties["mp2_same_spin_correlation_energy"] = ( 2.0 / 3.0) * properties['mp2_triplet_pair_energy'] properties["mp2_opposite_spin_correlation_energy"] = (1.0 / 3.0) \ * properties['mp2_triplet_pair_energy'] \ + properties['mp2_singlet_pair_energy'] del properties['mp2_singlet_pair_energy'] del properties['mp2_triplet_pair_energy'] if 'ccsd_singlet_pair_energy' in properties and 'ccsd_triplet_pair_energy' in properties: properties["ccsd_same_spin_correlation_energy"] = ( 2.0 / 3.0) * properties['ccsd_triplet_pair_energy'] properties["ccsd_opposite_spin_correlation_energy"] = (1.0 / 3.0) \ * properties['ccsd_triplet_pair_energy'] \ + properties['ccsd_singlet_pair_energy'] del properties['ccsd_singlet_pair_energy'] del properties['ccsd_triplet_pair_energy'] # Look for final energy in the molecule tag in case it's needed molecule = root.find('molpro_uri:job/molpro_uri:molecule', name_space) mol_method = molecule.attrib['method'] mol_final_energy = float(molecule.attrib['energy']) # A slightly more robust way of determining the final energy. # Throws an error if the energy isn't found for the method specified from the input_model. method = input_model.model.method method_energy_map = supported_methods[method]['energy'] if method in post_hf_methods and method_energy_map[ 'total energy'] in properties: final_energy = properties[method_energy_map['total energy']] elif method in scf_methods and method_energy_map[ 'Energy'] in properties: final_energy = properties[method_energy_map['Energy']] else: # Use the total energy from the molecule tag if it matches the input method if mol_method == method: final_energy = mol_final_energy if method in post_hf_methods: properties[ method_energy_map['total energy']] = mol_final_energy properties[method_energy_map[ 'correlation energy']] = mol_final_energy - properties[ 'scf_total_energy'] elif method in scf_methods: properties[method_energy_map['Energy']] = mol_final_energy else: raise KeyError( "Could not find {:s} total energy".format(method)) # Replace return_result with final_energy if gradient wasn't called if "return_result" not in output_data: output_data["return_result"] = final_energy output_data["properties"] = properties output_data['schema_name'] = 'qcschema_output' output_data['success'] = True return Result(**{**input_model.dict(), **output_data})
def parse_output(self, outfiles: Dict[str, str], input_model: 'ResultInput') -> 'Result': stdout = outfiles.pop("stdout") for fl, contents in outfiles.items(): if contents is not None: # LOG text += f'\n DFTD3 scratch file {fl} has been read.\n' pass # parse energy output (could go further and break into E6, E8, E10 and Cn coeff) real = np.array(input_model.molecule.real) full_nat = real.shape[0] real_nat = np.sum(real) for ln in stdout.splitlines(): if re.match(' Edisp /kcal,au', ln): ene = Decimal(ln.split()[3]) elif re.match(r" E6\(ABC\) \" :", ln): # c. v3.2.0 raise ResourceError( "Cannot process ATM results from DFTD3 prior to v3.2.1.") elif re.match(r""" E6\(ABC\) /kcal,au:""", ln): atm = Decimal(ln.split()[-1]) elif re.match(' normal termination of dftd3', ln): break else: if not ((real_nat == 1) and (input_model.driver == 'gradient')): raise UnknownError( 'Unsuccessful run. Possibly -D variant not available in dftd3 version.' ) # parse gradient output # * DFTD3 crashes on one-atom gradients. Avoid the error (above) and just force the correct result (below). if outfiles['dftd3_gradient'] is not None: srealgrad = outfiles['dftd3_gradient'].replace('D', 'E') realgrad = np.fromstring(srealgrad, count=3 * real_nat, sep=' ').reshape((-1, 3)) elif real_nat == 1: realgrad = np.zeros((1, 3)) if outfiles['dftd3_abc_gradient'] is not None: srealgrad = outfiles['dftd3_abc_gradient'].replace('D', 'E') realgradabc = np.fromstring(srealgrad, count=3 * real_nat, sep=' ').reshape((-1, 3)) elif real_nat == 1: realgradabc = np.zeros((1, 3)) if input_model.driver == 'gradient': ireal = np.argwhere(real).reshape((-1)) fullgrad = np.zeros((full_nat, 3)) rg = realgradabc if (input_model.extras['info']['dashlevel'] == 'atmgr') else realgrad try: fullgrad[ireal, :] = rg except NameError as exc: raise UnknownError( 'Unsuccessful gradient collection.') from exc qcvkey = input_model.extras['info']['fctldash'].upper() calcinfo = [] if input_model.extras['info']['dashlevel'] == 'atmgr': calcinfo.append(qcel.Datum('CURRENT ENERGY', 'Eh', atm)) calcinfo.append( qcel.Datum('DISPERSION CORRECTION ENERGY', 'Eh', atm)) calcinfo.append( qcel.Datum('3-BODY DISPERSION CORRECTION ENERGY', 'Eh', atm)) calcinfo.append( qcel.Datum( 'AXILROD-TELLER-MUTO 3-BODY DISPERSION CORRECTION ENERGY', 'Eh', atm)) if input_model.driver == 'gradient': calcinfo.append( qcel.Datum('CURRENT GRADIENT', 'Eh/a0', fullgrad)) calcinfo.append( qcel.Datum('DISPERSION CORRECTION GRADIENT', 'Eh/a0', fullgrad)) calcinfo.append( qcel.Datum('3-BODY DISPERSION CORRECTION GRADIENT', 'Eh/a0', fullgrad)) calcinfo.append( qcel.Datum( 'AXILROD-TELLER-MUTO 3-BODY DISPERSION CORRECTION GRADIENT', 'Eh/a0', fullgrad)) else: calcinfo.append(qcel.Datum('CURRENT ENERGY', 'Eh', ene)) calcinfo.append( qcel.Datum('DISPERSION CORRECTION ENERGY', 'Eh', ene)) calcinfo.append( qcel.Datum('2-BODY DISPERSION CORRECTION ENERGY', 'Eh', ene)) if qcvkey: calcinfo.append( qcel.Datum(f'{qcvkey} DISPERSION CORRECTION ENERGY', 'Eh', ene)) if input_model.driver == 'gradient': calcinfo.append( qcel.Datum('CURRENT GRADIENT', 'Eh/a0', fullgrad)) calcinfo.append( qcel.Datum('DISPERSION CORRECTION GRADIENT', 'Eh/a0', fullgrad)) calcinfo.append( qcel.Datum('2-BODY DISPERSION CORRECTION GRADIENT', 'Eh/a0', fullgrad)) if qcvkey: calcinfo.append( qcel.Datum(f'{qcvkey} DISPERSION CORRECTION GRADIENT', 'Eh/a0', fullgrad)) #LOGtext += qcel.datum.print_variables({info.label: info for info in calcinfo}) calcinfo = {info.label: info.data for info in calcinfo} #calcinfo = qcel.util.unnp(calcinfo, flat=True) # got to even out who needs plump/flat/Decimal/float/ndarray/list # Decimal --> str preserves precision calcinfo = { k.upper(): str(v) if isinstance(v, Decimal) else v for k, v in qcel.util.unnp(calcinfo, flat=True).items() } # jobrec['properties'] = {"return_energy": ene} # jobrec["molecule"]["real"] = list(jobrec["molecule"]["real"]) retres = calcinfo[f'CURRENT {input_model.driver.upper()}'] if isinstance(retres, Decimal): retres = float(retres) elif isinstance(retres, np.ndarray): retres = retres.ravel().tolist() output_data = { 'extras': input_model.extras, 'properties': {}, 'provenance': Provenance(creator="DFTD3", version=self.get_version(), routine=__name__ + '.' + sys._getframe().f_code.co_name), 'return_result': retres, 'stdout': stdout, } # yapf: disable output_data["extras"]['local_keywords'] = input_model.extras['info'] output_data["extras"]['qcvars'] = calcinfo output_data['success'] = True return Result(**{**input_model.dict(), **output_data})
def compute(self, input_model: 'ResultInput', config: 'JobConfig') -> 'Result': """ Runs Psi4 in API mode """ try: import psi4 except ModuleNotFoundError: raise ModuleNotFoundError("Could not find Psi4 in the Python path.") # Setup the job input_model = input_model.copy().dict() input_model["nthreads"] = config.ncores input_model["memory"] = int(config.memory * 1024 * 1024 * 1024 * 0.95) # Memory in bytes input_model["success"] = False input_model["return_output"] = True if input_model["schema_name"] == "qcschema_input": input_model["schema_name"] = "qc_schema_input" scratch = config.scratch_directory if scratch is not None: input_model["scratch_location"] = scratch psi_version = self.parse_version(psi4.__version__) if psi_version > self.parse_version("1.2"): mol = psi4.core.Molecule.from_schema(input_model) if (mol.multiplicity() != 1) and ("reference" not in input_model["keywords"]): input_model["keywords"]["reference"] = "uhf" output_data = psi4.json_wrapper.run_json(input_model) if "extras" not in output_data: output_data["extras"] = {} output_data["extras"]["local_qcvars"] = output_data.pop("psi4:qcvars", None) if output_data["success"] is False: output_data["error"] = {"error_type": "internal_error", "error_message": output_data["error"]} else: raise TypeError("Psi4 version '{}' not understood.".format(psi_version)) # Reset the schema if required output_data["schema_name"] = "qcschema_output" # Dispatch errors, PSIO Errors are not recoverable for future runs if output_data["success"] is False: if "PSIO Error" in output_data["error"]: raise ValueError(output_data["error"]) # Move several pieces up a level if output_data["success"]: output_data["provenance"]["memory"] = round(output_data.pop("memory") / (1024**3), 3) # Move back to GB output_data["provenance"]["nthreads"] = output_data.pop("nthreads") output_data["stdout"] = output_data.pop("raw_output", None) # Delete keys output_data.pop("return_ouput", None) return Result(**output_data) else: return FailedOperation( success=output_data.pop("success", False), error=output_data.pop("error"), input_data=output_data)