Ejemplo n.º 1
0
    def parse_output(
        self, outfiles: Dict[str, str], input_model: "AtomicInput"
    ) -> "AtomicResult":  # lgtm: [py/similar-function]

        stdout = outfiles.pop("stdout")

        qcvars, gradient, hessian = harvest(input_model.molecule, stdout, **outfiles)

        if gradient is not None:
            qcvars["CURRENT GRADIENT"] = gradient

        if hessian is not None:
            qcvars["CURRENT HESSIAN"] = hessian

        retres = qcvars[f"CURRENT {input_model.driver.upper()}"]
        if isinstance(retres, Decimal):
            retres = float(retres)

        build_out(qcvars)
        atprop = build_atomicproperties(qcvars)

        output_data = input_model.dict()
        output_data["extras"]["outfiles"] = outfiles
        output_data["properties"] = atprop
        output_data["provenance"] = Provenance(creator="Turbomole", version=self.get_version(), routine="turbomole")
        output_data["return_result"] = retres
        output_data["stdout"] = stdout
        output_data["success"] = True

        return AtomicResult(**output_data)
Ejemplo n.º 2
0
    def parse_output(self, outfiles: Dict[str, str],
                     input_model: "AtomicInput") -> "AtomicResult":

        # gamessmol, if it exists, is dinky, just a clue to geometry of gamess results
        qcvars, gamessgrad, gamessmol = harvest(input_model.molecule,
                                                outfiles["stdout"])

        if gamessgrad is not None:
            qcvars["CURRENT GRADIENT"] = gamessgrad

        qcvars = unnp(qcvars, flat=True)

        output_data = {
            "schema_name": "qcschema_output",
            "molecule": gamessmol,
            "schema_version": 1,
            "extras": {},
            "properties": {
                "nuclear_repulsion_energy":
                gamessmol.nuclear_repulsion_energy()
            },
            "return_result": qcvars[f"CURRENT {input_model.driver.upper()}"],
            "stdout": outfiles["stdout"],
        }

        # got to even out who needs plump/flat/Decimal/float/ndarray/list
        output_data["extras"]["qcvars"] = {
            k.upper(): float(v) if isinstance(v, Decimal) else v
            for k, v in qcel.util.unnp(qcvars, flat=True).items()
        }

        # copy qcvars into schema where possible
        qcvars_to_properties = {
            "DFT XC ENERGY": "scf_xc_energy",
            "ONE-ELECTRON ENERGY": "scf_one_electron_energy",
            "TWO-ELECTRON ENERGY": "scf_two_electron_energy",
            "SCF TOTAL ENERGY": "scf_total_energy",
            "MP2 CORRELATION ENERGY": "mp2_correlation_energy",
            "MP2 TOTAL ENERGY": "mp2_total_energy",
            "CCSD CORRELATION ENERGY": "ccsd_correlation_energy",
            "CCSD TOTAL ENERGY": "ccsd_total_energy",
            "CCSD(T) CORRELATION ENERGY": "ccsd_prt_pr_correlation_energy",
            "CCSD(T) TOTAL ENERGY": "ccsd_prt_pr_total_energy",
        }
        for qcvar in qcvars:
            if qcvar in qcvars_to_properties:
                output_data["properties"][
                    qcvars_to_properties[qcvar]] = qcvars[qcvar]
        if {"SCF DIPOLE X", "SCF DIPOLE Y", "SCF DIPOLE Z"} & set(
                qcvars.keys()):
            conv = Decimal(
                qcel.constants.conversion_factor("debye", "e * bohr"))
            output_data["properties"]["scf_dipole_moment"] = [
                qcvars["SCF DIPOLE X"] * conv,
                qcvars["SCF DIPOLE Y"] * conv,
                qcvars["SCF DIPOLE Z"] * conv,
            ]
        output_data["success"] = True

        return AtomicResult(**{**input_model.dict(), **output_data})
Ejemplo n.º 3
0
def harvest_as_atomic_result(input_model: OptimizationInput,
                             nwout: str) -> List[AtomicResult]:
    """Parse each step in the geometry relaxation as a separate AtomicResult

    Args:
        input_model: Input specification for the relaxation
        nwout: Standard out from the NWChem simulation
    Returns:
        A list of the results at each step
    """
    # Parse the files
    out_psivars, out_mols, out_grads, version, error = harvest_output(nwout)

    # Make atomic results
    results = []
    for qcvars, nwgrad, out_mol in zip(out_psivars, out_grads, out_mols):
        if nwgrad is not None:
            qcvars[
                f"{input_model.input_specification.model.method.upper()[4:]} TOTAL GRADIENT"] = nwgrad
            qcvars["CURRENT GRADIENT"] = nwgrad

        # Get the formatted properties
        build_out(qcvars)
        atprop = build_atomicproperties(qcvars)

        # Format them inout an output
        output_data = {
            "schema_version":
            1,
            "molecule":
            out_mol,
            "driver":
            "gradient",
            "extras":
            input_model.extras.copy(),
            "model":
            input_model.input_specification.model,
            "keywords":
            input_model.input_specification.keywords,
            "properties":
            atprop,
            "provenance":
            Provenance(creator="NWChem", version=version,
                       routine="nwchem_opt"),
            "return_result":
            nwgrad,
            "success":
            True,
        }

        # got to even out who needs plump/flat/Decimal/float/ndarray/list
        # Decimal --> str preserves precision
        output_data["extras"]["qcvars"] = {
            k.upper(): str(v) if isinstance(v, Decimal) else v
            for k, v in unnp(qcvars, flat=True).items()
        }

        results.append(AtomicResult(**output_data))
    return results
Ejemplo n.º 4
0
    def parse_output(
        self, outfiles: Dict[str, str], input_model: "AtomicInput"
    ) -> "AtomicResult":  # lgtm: [py/similar-function]

        # Get the stdout from the calculation (required)
        stdout = outfiles.pop("stdout")

        # Read the NWChem stdout file and, if needed, the hess or grad files
        qcvars, nwhess, nwgrad, nwmol, version, errorTMP = harvest(
            input_model.molecule, stdout, **outfiles)

        if nwgrad is not None:
            qcvars["CURRENT GRADIENT"] = nwgrad

        if nwhess is not None:
            qcvars["CURRENT HESSIAN"] = nwhess

        # Normalize the output as a float or list of floats
        retres = qcvars[f"CURRENT {input_model.driver.upper()}"]
        if isinstance(retres, Decimal):
            retres = float(retres)
        elif isinstance(retres, np.ndarray):
            retres = retres.tolist()

        # Get the formatted properties
        qcprops = extract_formatted_properties(qcvars)

        # Format them inout an output
        output_data = {
            "schema_name":
            "qcschema_output",
            "schema_version":
            1,
            "extras": {
                "outfiles": outfiles,
                **input_model.extras
            },
            "properties":
            qcprops,
            "provenance":
            Provenance(creator="NWChem",
                       version=self.get_version(),
                       routine="nwchem"),
            "return_result":
            retres,
            "stdout":
            stdout,
        }

        # got to even out who needs plump/flat/Decimal/float/ndarray/list
        # Decimal --> str preserves precision
        output_data["extras"]["qcvars"] = {
            k.upper(): str(v) if isinstance(v, Decimal) else v
            for k, v in qcel.util.unnp(qcvars, flat=True).items()
        }

        output_data["success"] = True
        return AtomicResult(**{**input_model.dict(), **output_data})
Ejemplo n.º 5
0
    def compute(self, input_data: "AtomicInput",
                config: "TaskConfig") -> "AtomicResult":
        """
        Runs RDKit in FF typing
        """

        self.found(raise_error=True)
        import rdkit
        from rdkit.Chem import AllChem

        # Failure flag
        ret_data = {"success": False}

        # Build the Molecule
        jmol = input_data.molecule
        mol = self._process_molecule_rdkit(jmol)

        if input_data.model.method.lower() == "uff":
            ff = AllChem.UFFGetMoleculeForceField(mol)
            all_params = AllChem.UFFHasAllMoleculeParams(mol)
        else:
            raise InputError("RDKit only supports the UFF method currently.")

        if all_params is False:
            raise InputError(
                "RDKit parameters not found for all atom types in molecule.")

        ff.Initialize()

        ret_data["properties"] = {
            "return_energy":
            ff.CalcEnergy() * ureg.conversion_factor("kJ / mol", "hartree")
        }

        if input_data.driver == "energy":
            ret_data["return_result"] = ret_data["properties"]["return_energy"]
        elif input_data.driver == "gradient":
            coef = ureg.conversion_factor("kJ / mol",
                                          "hartree") * ureg.conversion_factor(
                                              "angstrom", "bohr")
            ret_data["return_result"] = [x * coef for x in ff.CalcGrad()]
        else:
            raise InputError(
                f"RDKit can only compute energy and gradient driver methods. Found {input_data.driver}."
            )

        ret_data["provenance"] = Provenance(
            creator="rdkit",
            version=rdkit.__version__,
            routine="rdkit.Chem.AllChem.UFFGetMoleculeForceField")

        ret_data["schema_name"] = "qcschema_output"
        ret_data["success"] = True

        # Form up a dict first, then sent to BaseModel to avoid repeat kwargs which don't override each other
        return AtomicResult(**{**input_data.dict(), **ret_data})
Ejemplo n.º 6
0
    def parse_output(
        self, outfiles: Dict[str, str], input_model: "AtomicInput"
    ) -> AtomicResult:  # lgtm: [py/similar-function]

        # Get the stdout from the calculation (required)
        stdout = outfiles.pop("stdout")
        stderr = outfiles.pop("stderr")

        # Read the NWChem stdout file and, if needed, the hess or grad files
        try:
            qcvars, nwhess, nwgrad, nwmol, version, errorTMP = harvest(input_model.molecule, stdout, **outfiles)
        except Exception as e:
            raise UnknownError(stdout)

        if nwgrad is not None:
            qcvars[f"{input_model.model.method.upper()[4:]} TOTAL GRADIENT"] = nwgrad
            qcvars["CURRENT GRADIENT"] = nwgrad
        if nwhess is not None:
            qcvars["CURRENT HESSIAN"] = nwhess

        # Normalize the output as a float or list of floats
        if input_model.driver.upper() == "PROPERTIES":
            retres = qcvars[f"CURRENT ENERGY"]
        else:
            retres = qcvars[f"CURRENT {input_model.driver.upper()}"]
        if isinstance(retres, Decimal):
            retres = float(retres)
        elif isinstance(retres, np.ndarray):
            retres = retres.tolist()

        # Get the formatted properties
        build_out(qcvars)
        atprop = build_atomicproperties(qcvars)

        # Format them inout an output
        output_data = {
            "schema_version": 1,
            "extras": {"outfiles": outfiles, **input_model.extras},
            "properties": atprop,
            "provenance": Provenance(creator="NWChem", version=self.get_version(), routine="nwchem"),
            "return_result": retres,
            "stderr": stderr,
            "stdout": stdout,
            "success": True,
        }

        # got to even out who needs plump/flat/Decimal/float/ndarray/list
        # Decimal --> str preserves precision
        # * formerly unnp(qcvars, flat=True).items()
        output_data["extras"]["qcvars"] = {
            k.upper(): str(v) if isinstance(v, Decimal) else v for k, v in qcvars.items()
        }

        return AtomicResult(**{**input_model.dict(), **output_data})
Ejemplo n.º 7
0
    def parse_output(
        self, outfiles: Dict[str, str], input_model: AtomicInput
    ) -> AtomicResult:  # lgtm: [py/similar-function]

        stdout = outfiles.pop("stdout")
        stderr = outfiles.pop("stderr")

        # c4mol, if it exists, is dinky, just a clue to geometry of cfour results
        try:
            qcvars, c4hess, c4grad, c4mol, version, errorTMP = harvest(input_model.molecule, stdout, **outfiles)
        except Exception as e:
            raise UnknownError(stdout)

        if c4grad is not None:
            qcvars["CURRENT GRADIENT"] = c4grad
            qcvars[f"{input_model.model.method.upper()[3:]} TOTAL GRADIENT"] = c4grad

        if c4hess is not None:
            qcvars[f"{input_model.model.method.upper()[3:]} TOTAL HESSIAN"] = c4hess
            qcvars["CURRENT HESSIAN"] = c4hess

        if input_model.driver.upper() == "PROPERTIES":
            retres = qcvars[f"CURRENT ENERGY"]
        else:
            retres = qcvars[f"CURRENT {input_model.driver.upper()}"]

        if isinstance(retres, Decimal):
            retres = float(retres)
        elif isinstance(retres, np.ndarray):
            retres = retres.ravel().tolist()

        build_out(qcvars)
        atprop = build_atomicproperties(qcvars)

        output_data = {
            "schema_version": 1,
            "extras": {"outfiles": outfiles, **input_model.extras},
            "properties": atprop,
            "provenance": Provenance(creator="CFOUR", version=self.get_version(), routine="xcfour"),
            "return_result": retres,
            "stderr": stderr,
            "stdout": stdout,
            "success": True,
        }

        # got to even out who needs plump/flat/Decimal/float/ndarray/list
        # Decimal --> str preserves precision
        # * formerly unnp(qcvars, flat=True).items()
        output_data["extras"]["qcvars"] = {
            k.upper(): str(v) if isinstance(v, Decimal) else v for k, v in qcvars.items()
        }

        return AtomicResult(**{**input_model.dict(), **output_data})
Ejemplo n.º 8
0
    def parse_output(self, outfiles: Dict[str, str],
                     input_model: AtomicInput) -> AtomicResult:
        """
        For the set of output files parse them to extract as much info as possible and return the atomic result.
        From the fchk file we get the energy and hessian, the gradient is taken from the log file.
        """
        properties = {}
        qcvars = {}
        # make sure we got valid exit status
        self.check_convergence(logfile=outfiles["gaussian.log"])
        version = self.parse_version(logfile=outfiles["gaussian.log"])
        # build the main data dict
        output_data = input_model.dict()
        provenance = {
            "version": version,
            "creator": "gaussian",
            "routine": "CLI"
        }
        # collect the total energy from the fchk file
        logfile = outfiles["lig.fchk"]
        for line in logfile.split("\n"):
            if "Total Energy" in line:
                energy = float(line.split()[3])
                properties["return_energy"] = energy
                properties["scf_total_energy"] = energy
                if input_model.driver == "energy":
                    output_data["return_result"] = energy
        if input_model.driver == "gradient":
            # now we need to parse out the forces
            gradient = self.parse_gradient(fchfile=outfiles["lig.fchk"])
            output_data["return_result"] = gradient
        elif input_model.driver == "hessian":
            hessian = self.parse_hessian(fchkfile=outfiles["lig.fchk"])
            output_data["return_result"] = hessian

        # parse scf_properties
        if "scf_properties" in input_model.keywords:
            qcvars["WIBERG_LOWDIN_INDICES"] = self.parse_wbo(
                logfile=outfiles["gaussian.log"],
                natoms=len(input_model.molecule.symbols),
            )

        # if there is an extra output file grab it
        if "gaussian.wfx" in outfiles:
            output_data["extras"]["gaussian.wfx"] = outfiles["gaussian.wfx"]
        if qcvars:
            output_data["extras"]["qcvars"] = qcvars
        output_data["properties"] = properties
        output_data["schema_name"] = "qcschema_output"
        output_data["stdout"] = outfiles["gaussian.log"]
        output_data["success"] = True
        output_data["provenance"] = provenance
        return AtomicResult(**output_data)
Ejemplo n.º 9
0
    def _compute(self, driver):

        import psi4

        inp = self.generate_schema_input(driver)

        if "1.3" in psi4.__version__:
            ret = psi4.json_wrapper.run_json_qcschema(inp.dict(), clean=True)
        else:
            ret = psi4.schema_wrapper.run_json_qcschema(
                inp.dict(), clean=True, json_serialization=True)
        ret = AtomicResult(**ret)
        return ret
Ejemplo n.º 10
0
    def parse_output(
        self, outfiles: Dict[str, str], input_model: "AtomicInput"
    ) -> "AtomicResult":  # lgtm: [py/similar-function]

        stdout = outfiles.pop("stdout")

        # c4mol, if it exists, is dinky, just a clue to geometry of cfour results
        qcvars, c4hess, c4grad, c4mol, version, errorTMP = harvest(
            input_model.molecule, stdout, **outfiles)

        if c4grad is not None:
            qcvars["CURRENT GRADIENT"] = c4grad

        if c4hess is not None:
            qcvars["CURRENT HESSIAN"] = c4hess

        retres = qcvars[f"CURRENT {input_model.driver.upper()}"]
        if isinstance(retres, Decimal):
            retres = float(retres)
        elif isinstance(retres, np.ndarray):
            retres = retres.ravel().tolist()

        output_data = {
            "schema_name":
            "qcschema_output",
            "schema_version":
            1,
            "extras": {
                "outfiles": outfiles
            },
            "properties": {},
            "provenance":
            Provenance(creator="CFOUR",
                       version=self.get_version(),
                       routine="xcfour"),
            "return_result":
            retres,
            "stdout":
            stdout,
        }

        # got to even out who needs plump/flat/Decimal/float/ndarray/list
        # Decimal --> str preserves precision
        output_data["extras"]["qcvars"] = {
            k.upper(): str(v) if isinstance(v, Decimal) else v
            for k, v in qcel.util.unnp(qcvars, flat=True).items()
        }

        output_data["success"] = True
        return AtomicResult(**{**input_model.dict(), **output_data})
Ejemplo n.º 11
0
    def parse_output(self, outfiles: Dict[str, str],
                     input_model: "AtomicInput") -> AtomicResult:

        output_data = {}

        bdata = {}
        outtext = ""
        for k, v in outfiles.items():
            if k == "dispatch.out":
                outtext = v
                continue
            if v is None:
                continue
            bdata[k] = np.frombuffer(v)

        if input_model.driver == "energy":
            output_data["return_result"] = bdata["99.0"][-1]
        elif input_model.driver == "gradient":
            output_data["return_result"] = bdata["131.0"]
        elif input_model.driver == "hessian":
            output_data["return_result"] = bdata["132.0"]
        else:
            raise ValueError(
                f"Could not parse driver of type {input_model.driver}.")

        properties = {
            "nuclear_repulsion_energy": bdata["99.0"][0],
            "scf_total_energy": bdata["99.0"][1],
            "return_energy": bdata["99.0"][-1],
        }

        _mp2_methods = {"mp2", "rimp2"}
        if input_model.model.method.lower() in _mp2_methods:
            properties["mp2_total_energy"] = properties["return_energy"]

        # Correct CCSD because its odd?
        # if input_model.model.method.lower() == "ccsd":
        #     m1 = re.findall(" CCSD correlation energy.+=.+\d+\.\d+", outfiles["dispatch.out"])
        #     m2 = re.findall(" CCSD total energy.+=.+\d+\.\d+", outfiles["dispatch.out"])

        props, prov = self._parse_logfile_common(outtext, input_model.dict())
        output_data["provenance"] = prov
        output_data["properties"] = properties
        output_data["properties"].update(props)
        output_data["stdout"] = outfiles["dispatch.out"]
        output_data["success"] = True

        return AtomicResult(**{**input_model.dict(), **output_data})
Ejemplo n.º 12
0
    def _compute(self, driver):
        logger = logging.getLogger(__name__)
        logger.info("UserComputer only returning provided values")
        E = self.external_energy
        gX = self.external_gradient
        HX = self.external_hessian

        if driver == "hessian":
            if HX is None or gX is None or E is None:
                raise OptError("Must provide hessian, gradient, and energy.")
        elif driver == "gradient":
            if gX is None or E is None:
                raise OptError("Must provide gradient and energy.")
        elif driver == "energy":
            if E is None:
                raise OptError("Must provide energy.")

        result = deepcopy(UserComputer.output_skeleton)
        result["driver"] = driver
        mol = Molecule(**self.molecule)
        result["molecule"] = mol
        NRE = mol.nuclear_repulsion_energy()
        result["properties"]["nuclear_repulsion_energy"] = NRE
        result["extras"]["qcvars"]["NUCLEAR REPULSION ENERGY"] = NRE

        result["properties"]["return_energy"] = E
        result["extras"]["qcvars"]["CURRENT ENERGY"] = E

        if driver in ["gradient", "hessian"]:
            result["extras"]["qcvars"]["CURRENT GRADIENT"] = gX

        if driver == "hessian":
            result["extras"]["qcvars"]["CURRENT HESSIAN"] = HX

        if driver == "energy":
            result["return_result"] = E
        elif driver == "gradient":
            result["return_result"] = gX
        elif driver == "hessian":
            result["return_result"] = HX

        # maybe do this to protect against repeatedly going back for same?
        self.external_energy = None
        self.external_gradient = None
        self.external_hessian = None
        return AtomicResult(**result)
Ejemplo n.º 13
0
    def parse_output(self, outfiles: Dict[str, str], input_model: AtomicInput) -> AtomicResult:

        # Get the stdout from the calculation (required)
        stdout = outfiles.pop("stdout")
        stderr = outfiles.pop("stderr")

        # gamessmol, if it exists, is dinky, just a clue to geometry of gamess results
        qcvars, gamessgrad, gamessmol = harvest(input_model.molecule, stdout, **outfiles)

        if gamessgrad is not None:
            qcvars["CURRENT GRADIENT"] = gamessgrad

        if input_model.driver.upper() == "PROPERTIES":
            retres = qcvars[f"CURRENT ENERGY"]
        else:
            retres = qcvars[f"CURRENT {input_model.driver.upper()}"]

        build_out(qcvars)
        atprop = build_atomicproperties(qcvars)

        output_data = {
            "schema_version": 1,
            "molecule": gamessmol,
            "extras": {"outfiles": outfiles, **input_model.extras},
            "properties": atprop,
            "provenance": Provenance(creator="GAMESS", version=self.get_version(), routine="rungms"),
            "return_result": retres,
            "stderr": stderr,
            "stdout": stdout,
            "success": True,
        }

        # got to even out who needs plump/flat/Decimal/float/ndarray/list
        output_data["extras"]["qcvars"] = {
            k.upper(): str(v) if isinstance(v, Decimal) else v for k, v in unnp(qcvars, flat=True).items()
        }

        return AtomicResult(**{**input_model.dict(), **output_data})
Ejemplo n.º 14
0
def test_add_data():
    md = MoleculeData.from_identifier("O")

    # Load the xtb geometry
    xtb_geom = OptimizationResult.parse_file(
        _my_path.joinpath('records/xtb-neutral.json'))
    md.add_geometry(xtb_geom)
    assert "xtb" in md.data
    assert "neutral" in md.data["xtb"]
    assert isclose(md.data["xtb"][
        OxidationState.NEUTRAL].atomization_energy["xtb-no_zpe"],
                   -0.515,
                   abs_tol=1e-2)
    assert ("xtb", "neutral") == md.match_geometry(xtb_geom.final_molecule)

    # Load in a relaxed oxidized geometry
    xtb_geom_ox = OptimizationResult.parse_file(
        _my_path.joinpath('records/xtb-oxidized.json'))
    md.add_geometry(xtb_geom_ox)
    assert "xtb" in md.data
    assert "oxidized" in md.data["xtb"]
    assert ("xtb",
            "neutral") == md.match_geometry(xtb_geom_ox.initial_molecule)
    assert ("xtb", "oxidized") == md.match_geometry(xtb_geom_ox.final_molecule)
    assert md.data['xtb'][OxidationState.OXIDIZED].total_energy[OxidationState.OXIDIZED]['xtb'] != \
           md.data['xtb'][OxidationState.NEUTRAL].total_energy[OxidationState.OXIDIZED]['xtb']

    # Load in a oxidized energy for the neutral structure
    xtb_energy = AtomicResult.parse_file(
        _my_path.joinpath('records/xtb-neutral_xtb-oxidized-energy.json'))
    md.add_single_point(xtb_energy)

    # Add in solvation energies
    xtb_energy = AtomicResult.parse_file(
        _my_path.joinpath('records/xtb-neutral_acn.json'))
    md.add_single_point(xtb_energy)
    assert "acetonitrile" in md.data['xtb']['neutral'].solvation_energy[
        'neutral']

    xtb_energy = AtomicResult.parse_file(
        _my_path.joinpath('records/xtb-oxidized_acn.json'))
    md.add_single_point(xtb_energy)
    assert "acetonitrile" in md.data['xtb']['oxidized'].solvation_energy[
        'oxidized']

    # Show that we can compute a redox potential
    recipe = RedoxEnergyRecipe(name="xtb-vertical",
                               geometry_level="xtb",
                               energy_level="xtb",
                               adiabatic=False)
    result = recipe.compute_redox_potential(md, OxidationState.OXIDIZED)
    assert md.oxidation_potential['xtb-vertical'] == result

    recipe = RedoxEnergyRecipe(name="xtb",
                               geometry_level="xtb",
                               energy_level="xtb",
                               adiabatic=True)
    result = recipe.compute_redox_potential(md, OxidationState.OXIDIZED)
    assert md.oxidation_potential['xtb'] == result
    assert md.oxidation_potential['xtb'] < md.oxidation_potential[
        'xtb-vertical']

    recipe = RedoxEnergyRecipe(name="xtb-acn",
                               geometry_level="xtb",
                               energy_level="xtb",
                               adiabatic=True,
                               solvent='acetonitrile',
                               solvation_level='xtb')
    result = recipe.compute_redox_potential(md, OxidationState.OXIDIZED)
    assert md.oxidation_potential['xtb-acn'] == result
    assert md.oxidation_potential['xtb-acn'] != md.oxidation_potential['xtb']

    # Add a single point small_basis computation
    smb_hessian = AtomicResult.parse_file(
        _my_path.joinpath('records/xtb-neutral_smb-neutral-hessian.json'))
    md.add_single_point(smb_hessian)
    assert isclose(md.data["xtb"][OxidationState.NEUTRAL].zpe[
        OxidationState.NEUTRAL]['small_basis'],
                   0.02155,
                   abs_tol=1e-3)

    # Add an NWChem with solvent
    smb_solvent = AtomicResult.parse_file(
        _my_path.joinpath('records/xtb-neutral_smb-neutral_water.json'))
    md.add_single_point(smb_solvent)
    assert 'small_basis' in md.data['xtb']['neutral'].total_energy_in_solvent[
        'neutral']['water']
Ejemplo n.º 15
0
    def parse_output(self, outfiles: Dict[str, str],
                     input_model: "AtomicInput") -> "AtomicResult":

        keep_keys = {
            "heat_of_formation",
            "energy_electronic",
            "energy_nuclear",
            "gradient_norm",
            "dip_vec",
            "spin_component",
            "total_spin",
            "molecular_weight",
            "molecular_weight",
            "total_energy",
            "gradients",
            "mopac_version",
            "atom_charges",
            "point_group",
        }

        # Convert back to atomic units
        conversions = {
            "KCAL/MOL":
            1 / self.extras["hartree_to_kcalmol"],
            "KCAL/MOL/ANGSTROM":
            self.extras["bohr_to_angstroms"] /
            self.extras["hartree_to_kcalmol"],
            "EV":
            1 / self.extras["hartree_to_ev"],
            "DEBYE":
            1 / self.extras["au_to_debye"],
            "AMU":
            1,
            None:
            1,
        }

        data = {}
        last_key = None

        # Parse the weird structure
        if outfiles["dispatch.aux"] is None:
            error = "An unknown error occured and no results were captured."
            if outfiles["dispatch.out"] is not None:
                error = outfiles["dispatch.out"]
            raise UnknownError(error)

        for line in outfiles["dispatch.aux"].splitlines():
            if ("START" in line) or ("END" in line) or ("#" in line):
                continue

            if "=" in line:

                # Primary split
                key, value = line.split("=", 1)

                # Format key, may have units
                # IONIZATION_POTENTIAL:EV
                # GRADIENTS:KCAL/MOL/ANGSTROM[09]
                key_list = key.split(":", 1)
                if len(key_list) == 1:
                    key, units = key_list[0], None
                else:
                    key, units = key.split(":", 1)

                # Pop off [xx] items
                if units and "[" in units:
                    units, _ = units.split("[", 1)

                if "[" in key:
                    key, _ = key.split("[", 1)

                key = key.strip().lower()
                last_key = key

                # Skip keys that are not useful
                if key not in keep_keys:
                    last_key = None
                    continue

                # 1D+3 -> 1E3 conversion
                cf = conversions[units]

                value = value.strip().replace("D+", "E+").replace("D-", "E-")
                if ("E+" in value) or ("E-" in value):
                    if value.count("E") > 1:
                        value = [float(x) * cf for x in value.split()]
                    else:
                        value = float(value) * cf

                if value == "":
                    value = []

                data[key] = (cf, value)
            else:
                if last_key is None:
                    continue

                cf = data[last_key][0]
                data[last_key][1].extend([float(x) * cf for x in line.split()])

        data = {k: v[1] for k, v in data.items()}
        if ("gradients" not in data) or ("mopac_version" not in data):
            raise UnknownError(
                "Could not correctly parse the MOPAC output file.")

        gradient = data.pop("gradients")

        output = input_model.dict()
        output["provenance"] = {
            "creator": "mopac",
            "version": data.pop("mopac_version")
        }

        output["properties"] = {}
        output["properties"]["return_energy"] = data["heat_of_formation"]

        output["extras"].update(data)

        if input_model.driver == "energy":
            output["return_result"] = data["heat_of_formation"]
        else:
            output["return_result"] = gradient

        output["stdout"] = outfiles["dispatch.out"]
        output["success"] = True

        return AtomicResult(**output)
Ejemplo n.º 16
0
    def parse_output(self, output: Dict[str, Any],
                     input_model: "AtomicInput") -> "AtomicResult":

        wavefunction_map = {
            "orbitals_alpha": "scf_orbitals_a",
            "orbitals_beta": "scf_orbitals_b",
            "density_alpha": "scf_density_a",
            "density_beta": "scf_density_b",
            "fock_alpha": "scf_fock_a",
            "fock_beta": "scf_fock_b",
            "eigenvalues_alpha": "scf_eigenvalues_a",
            "eigenvalues_beta": "scf_eigenvalues_b",
            "occupations_alpha": "scf_occupations_a",
            "occupations_beta": "scf_occupations_b",
        }

        output_data = input_model.dict()

        output_data["return_result"] = output[input_model.driver.value]

        # Always build a wavefunction, it will be stripped
        obas = output["wavefunction"]["ao_basis"]
        for k, center in obas["center_data"].items():
            # Convert basis set, cannot handle arrays
            for shell in center["electron_shells"]:
                shell.pop("normalized_primitives", None)
                for el_k in ["coefficients", "exponents", "angular_momentum"]:
                    shell[el_k] = shell[el_k].tolist()

            if center["ecp_potentials"] is not None:
                for shell in center["ecp_potentials"]:
                    shell.pop("ecp_potentials", None)
                    for ecp_k in [
                            "angular_momentum", "r_exponents",
                            "gaussian_exponents", "coefficients"
                    ]:
                        shell[ecp_k] = shell[ecp_k].tolist()

        basis_set = BasisSet(name=str(input_model.model.basis),
                             center_data=obas["center_data"],
                             atom_map=obas["atom_map"])

        wavefunction = {"basis": basis_set}
        for key, qcschema_key in wavefunction_map.items():
            qcore_data = output["wavefunction"].get(key, None)
            if qcore_data is None:
                continue

            if ("density" in key) or ("fock" in key):
                qcore_data = reorder_row_and_column_ao_indices(
                    qcore_data, basis_set, self._qcore_to_cca_ao_order)
            # Handles orbitals and 1D
            elif "orbitals" in key:
                qcore_data = reorder_column_ao_indices(
                    qcore_data, basis_set, self._qcore_to_cca_ao_order)
            elif "eigenvalues" in key:
                qcore_data = reorder_column_ao_indices(
                    qcore_data.reshape(1, -1), basis_set,
                    self._qcore_to_cca_ao_order).ravel()

            elif "occupations" in key:
                tmp = np.zeros(basis_set.nbf)
                tmp[:qcore_data.shape[0]] = qcore_data
                qcore_data = reorder_column_ao_indices(
                    tmp.reshape(1, -1), basis_set,
                    self._qcore_to_cca_ao_order).ravel()
            else:
                raise KeyError("Wavefunction conversion key not understood")

            wavefunction[qcschema_key] = qcore_data

        wavefunction["restricted"] = True
        if "scf_eigenvalues_b" in wavefunction:
            wavefunction["restricted"] = False

        output_data["wavefunction"] = wavefunction

        # Handle remaining top level keys
        properties = {
            "calcinfo_nbasis": basis_set.nbf,
            "calcinfo_nmo": basis_set.nbf,
            "calcinfo_nalpha": np.sum(wavefunction["scf_occupations_a"] > 0),
            "calcinfo_natom": input_model.molecule.symbols.shape[0],
            "return_energy": output["energy"],
        }
        if wavefunction["restricted"]:
            properties["calcinfo_nbeta"] = properties["calcinfo_nalpha"]
        else:
            properties["calcinfo_nbeta"] = np.sum(
                wavefunction["scf_occupations_b"] > 0)

        output_data["properties"] = properties

        output_data["schema_name"] = "qcschema_output"
        output_data["success"] = True

        return AtomicResult(**output_data)
Ejemplo n.º 17
0
    def parse_output(self, outfiles: Dict[str, str], input_model: "AtomicInput") -> "AtomicResult":
        output_data = {}
        properties = {}

        # Parse the output file, collect properties and gradient
        output_lines = outfiles["tc.out"].split("\n")
        gradients = []
        natom = 0
        line_final_energy = -1
        line_scf_header = -1
        for idx, line in enumerate(output_lines):
            if "FINAL ENERGY" in line:
                properties["scf_total_energy"] = float(line.strip("\n").split()[2])
                line_final_energy = idx
            elif "Start SCF Iterations" in line:
                line_scf_header = idx
            elif "Total atoms" in line:
                natom = int(line.split()[-1])
            elif "DIPOLE MOMENT" in line:
                newline = line.replace(",", "").replace("}", "").replace("{", "")
                properties["scf_dipole_moment"] = [float(x) for x in newline.split()[2:5]]
            elif "Nuclear repulsion energy" in line:
                properties["nuclear_repulsion_energy"] = float(line.split()[-2])
            elif "Gradient units are Hartree/Bohr" in line:
                # Gradient is stored as (dE/dx1,dE/dy1,dE/dz1,dE/dx2,dE/dy2,...)
                for i in range(idx + 3, idx + 3 + natom):
                    grad = output_lines[i].strip("\n").split()
                    for x in grad:
                        gradients.append(float(x))

        last_scf_line = ""
        for idx in reversed(range(line_scf_header, line_final_energy)):
            mobj = re.search(
                r"^\s*\d+\s+" + DECIMAL + r"\s+" + DECIMAL + r"\s+" + DECIMAL + r"\s+" + DECIMAL,
                output_lines[idx],
                re.VERBOSE,
            )
            if mobj:
                last_scf_line = output_lines[idx]
                break

        if len(last_scf_line) > 0:
            properties["scf_iterations"] = int(last_scf_line.split()[0])
            if "XC Energy" in output_lines:
                properties["scf_xc_energy"] = float(last_scf_line.split()[4])
        else:
            raise UnknownError("SCF iteration lines not found in TeraChem output")

        if len(gradients) > 0:
            output_data["return_result"] = gradients

        # Commented out the properties currently not supported by QCSchema
        # properites["spin_S2"] = 1 # calculated S(S+1)
        #   elif "SPIN S-SQUARED" in line:
        #       properties["spin_S2"] = float(line.strip('\n').split()[2])
        # Parse files in scratch folder
        # properties["atomic_charge"] = []
        # atomic_charge_lines =  open(outfiles["charge.xls"]).readlines()
        # for line in atomic_charge_lines:
        #    properties["atomic_charge"].append(line.strip('\n').split()[-1])

        if "return_result" not in output_data:
            if "scf_total_energy" in properties:
                output_data["return_result"] = properties["scf_total_energy"]
            else:
                raise KeyError("Could not find SCF total energy")

        output_data["properties"] = properties

        output_data["schema_name"] = "qcschema_output"
        output_data["stdout"] = outfiles["tc.out"]
        # TODO Should only return True if TeraChem calculation terminated properly
        output_data["success"] = True

        # return extra files requested by user as extras
        for extra in input_model.extras.keys():
            input_model.extras[extra] = outfiles[extra]

        return AtomicResult(**{**input_model.dict(), **output_data})
Ejemplo n.º 18
0
    def parse_output(self, outfiles: Dict[str, str],
                     input_model: "AtomicInput") -> "AtomicResult":
        stdout = outfiles.pop("stdout")

        for fl, contents in outfiles.items():
            if contents is not None:
                # LOG text += f'\n  MP2D scratch file {fl} has been read.\n'
                pass

        # parse energy output (could go further and break into UCHF, CKS)
        real = np.array(input_model.molecule.real)
        full_nat = real.shape[0]
        real_nat = np.sum(real)

        for ln in stdout.splitlines():
            if re.match("   MP2D dispersion correction Eh", ln):
                ene = Decimal(ln.split()[4])
            elif re.match("Atomic Coordinates in Angstroms", ln):
                break
        else:
            if not ((real_nat == 1) and (input_model.driver == "gradient")):
                raise UnknownError("Unknown issue occured.")

        # parse gradient output
        if outfiles["mp2d_gradient"] is not None:
            srealgrad = outfiles["mp2d_gradient"]
            realgrad = np.fromstring(srealgrad, count=3 * real_nat,
                                     sep=" ").reshape((-1, 3))

        if input_model.driver == "gradient":
            ireal = np.argwhere(real).reshape((-1))
            fullgrad = np.zeros((full_nat, 3))
            try:
                fullgrad[ireal, :] = realgrad
            except NameError as exc:
                raise UnknownError(
                    "Unsuccessful gradient collection.") from exc

        qcvkey = input_model.extras["info"]["fctldash"].upper()

        calcinfo = []
        calcinfo.append(qcel.Datum("CURRENT ENERGY", "Eh", ene))
        calcinfo.append(qcel.Datum("DISPERSION CORRECTION ENERGY", "Eh", ene))
        calcinfo.append(
            qcel.Datum("2-BODY DISPERSION CORRECTION ENERGY", "Eh", ene))
        if qcvkey:
            calcinfo.append(
                qcel.Datum(f"{qcvkey} DISPERSION CORRECTION ENERGY", "Eh",
                           ene))

        if input_model.driver == "gradient":
            calcinfo.append(qcel.Datum("CURRENT GRADIENT", "Eh/a0", fullgrad))
            calcinfo.append(
                qcel.Datum("DISPERSION CORRECTION GRADIENT", "Eh/a0",
                           fullgrad))
            calcinfo.append(
                qcel.Datum("2-BODY DISPERSION CORRECTION GRADIENT", "Eh/a0",
                           fullgrad))
            if qcvkey:
                calcinfo.append(
                    qcel.Datum(f"{qcvkey} DISPERSION CORRECTION GRADIENT",
                               "Eh/a0", fullgrad))

        # LOGtext += qcel.datum.print_variables({info.label: info for info in calcinfo})
        calcinfo = {info.label: info.data for info in calcinfo}
        # calcinfo = qcel.util.unnp(calcinfo, flat=True)

        # got to even out who needs plump/flat/Decimal/float/ndarray/list
        # Decimal --> str preserves precision
        calcinfo = {
            k.upper(): str(v) if isinstance(v, Decimal) else v
            for k, v in calcinfo.items()
        }

        # jobrec['properties'] = {"return_energy": ene}
        # jobrec["molecule"]["real"] = list(jobrec["molecule"]["real"])

        retres = calcinfo[f"CURRENT {input_model.driver.upper()}"]
        if isinstance(retres, Decimal):
            retres = float(retres)
        elif isinstance(retres, np.ndarray):
            retres = retres.ravel().tolist()

        output_data = {
            "extras":
            input_model.extras,
            "properties": {},
            "provenance":
            Provenance(creator="MP2D",
                       version=self.get_version(),
                       routine=__name__ + "." +
                       sys._getframe().f_code.co_name),
            "return_result":
            retres,
            "stdout":
            stdout,
        }
        output_data["extras"]["local_keywords"] = input_model.extras["info"]
        output_data["extras"]["qcvars"] = calcinfo

        output_data["success"] = True
        return AtomicResult(**{**input_model.dict(), **output_data})
Ejemplo n.º 19
0
    def parse_output(self, outfiles: Dict[str, str], input_model: "AtomicInput") -> "AtomicResult":
        Grimme_h2kcal = 627.509541
        stdout = outfiles.pop("stdout")

        for fl, contents in outfiles.items():
            if contents is not None:
                # LOG text += f'\n  DFTD3 scratch file {fl} has been read.\n'
                pass

        # parse energy output (could go further and break into E6, E8, E10 and Cn coeff)
        real = np.array(input_model.molecule.real)
        full_nat = real.shape[0]
        real_nat = np.sum(real)
        for ln in stdout.splitlines():
            if re.match(" Edisp /kcal,au", ln):
                ene = Decimal(ln.split()[3])
            elif re.match(r" E6\(ABC\) \"   :", ln):  # c. v3.2.0
                raise ResourceError("Cannot process ATM results from DFTD3 prior to v3.2.1.")
            elif re.match(r""" E6\(ABC\) /kcal,au:""", ln):
                atm = Decimal(ln.split()[-1])
            elif re.match(" analysis of pair-wise terms", ln):
                D3pairs = np.zeros((full_nat, full_nat))
                # Iterate over block
                start = stdout.splitlines().index(ln) + 2
                for l in stdout.splitlines()[start:]:
                    data = l.replace("-", " -").split()
                    # print(data)
                    if len(data) == 0:
                        break
                    atom1 = int(data[0]) - 1
                    atom2 = int(data[1]) - 1
                    Edisp = Decimal(data[-1])
                    D3pairs[atom1, atom2] = Edisp / Decimal(Grimme_h2kcal)
                    D3pairs[atom2, atom1] = D3pairs[atom1, atom2]

            elif re.match(" normal termination of dftd3", ln):
                break
        else:
            if not ((real_nat == 1) and (input_model.driver == "gradient")):
                raise UnknownError(
                    f"Unsuccessful run. Check input, particularly geometry in [a0]. Model: {input_model.model}"
                )

        # parse gradient output
        # * DFTD3 crashes on one-atom gradients. Avoid the error (above) and just force the correct result (below).
        if outfiles["dftd3_gradient"] is not None:
            srealgrad = outfiles["dftd3_gradient"].replace("D", "E")
            realgrad = np.fromstring(srealgrad, count=3 * real_nat, sep=" ").reshape((-1, 3))
        elif real_nat == 1:
            realgrad = np.zeros((1, 3))

        if outfiles["dftd3_abc_gradient"] is not None:
            srealgrad = outfiles["dftd3_abc_gradient"].replace("D", "E")
            realgradabc = np.fromstring(srealgrad, count=3 * real_nat, sep=" ").reshape((-1, 3))
        elif real_nat == 1:
            realgradabc = np.zeros((1, 3))

        if input_model.driver == "gradient":
            ireal = np.argwhere(real).reshape((-1))
            fullgrad = np.zeros((full_nat, 3))
            rg = realgradabc if (input_model.extras["info"]["dashlevel"] == "atmgr") else realgrad
            try:
                fullgrad[ireal, :] = rg
            except NameError as exc:
                raise UnknownError("Unsuccessful gradient collection.") from exc

        qcvkey = input_model.extras["info"]["fctldash"].upper()

        calcinfo = []
        if input_model.extras["info"]["dashlevel"] == "atmgr":
            calcinfo.append(qcel.Datum("CURRENT ENERGY", "Eh", atm))
            calcinfo.append(qcel.Datum("DISPERSION CORRECTION ENERGY", "Eh", atm))
            calcinfo.append(qcel.Datum("3-BODY DISPERSION CORRECTION ENERGY", "Eh", atm))
            calcinfo.append(qcel.Datum("AXILROD-TELLER-MUTO 3-BODY DISPERSION CORRECTION ENERGY", "Eh", atm))

            if input_model.driver == "gradient":
                calcinfo.append(qcel.Datum("CURRENT GRADIENT", "Eh/a0", fullgrad))
                calcinfo.append(qcel.Datum("DISPERSION CORRECTION GRADIENT", "Eh/a0", fullgrad))
                calcinfo.append(qcel.Datum("3-BODY DISPERSION CORRECTION GRADIENT", "Eh/a0", fullgrad))
                calcinfo.append(
                    qcel.Datum("AXILROD-TELLER-MUTO 3-BODY DISPERSION CORRECTION GRADIENT", "Eh/a0", fullgrad)
                )

        else:
            calcinfo.append(qcel.Datum("CURRENT ENERGY", "Eh", ene))
            calcinfo.append(qcel.Datum("DISPERSION CORRECTION ENERGY", "Eh", ene))
            calcinfo.append(qcel.Datum("2-BODY DISPERSION CORRECTION ENERGY", "Eh", ene))
            if qcvkey:
                calcinfo.append(qcel.Datum(f"{qcvkey} DISPERSION CORRECTION ENERGY", "Eh", ene))

            if input_model.driver == "gradient":
                calcinfo.append(qcel.Datum("CURRENT GRADIENT", "Eh/a0", fullgrad))
                calcinfo.append(qcel.Datum("DISPERSION CORRECTION GRADIENT", "Eh/a0", fullgrad))
                calcinfo.append(qcel.Datum("2-BODY DISPERSION CORRECTION GRADIENT", "Eh/a0", fullgrad))
                if qcvkey:
                    calcinfo.append(qcel.Datum(f"{qcvkey} DISPERSION CORRECTION GRADIENT", "Eh/a0", fullgrad))

        # LOGtext += qcel.datum.print_variables({info.label: info for info in calcinfo})
        calcinfo = {info.label: info.data for info in calcinfo}
        # calcinfo = qcel.util.unnp(calcinfo, flat=True)

        # got to even out who needs plump/flat/Decimal/float/ndarray/list
        # Decimal --> str preserves precision
        calcinfo = {
            k.upper(): str(v) if isinstance(v, Decimal) else v for k, v in qcel.util.unnp(calcinfo, flat=True).items()
        }

        # jobrec['properties'] = {"return_energy": ene}
        # jobrec["molecule"]["real"] = list(jobrec["molecule"]["real"])

        retres = calcinfo[f"CURRENT {input_model.driver.upper()}"]
        if isinstance(retres, Decimal):
            retres = float(retres)
        elif isinstance(retres, np.ndarray):
            retres = retres.ravel().tolist()

        output_data = {
            "extras": input_model.extras,
            "properties": {},
            "provenance": Provenance(
                creator="DFTD3", version=self.get_version(), routine=__name__ + "." + sys._getframe().f_code.co_name
            ),
            "return_result": retres,
            "stdout": stdout,
        }
        output_data["extras"]["local_keywords"] = input_model.extras["info"]
        output_data["extras"]["qcvars"] = calcinfo
        if input_model.keywords.get("save_pairwise_dispersion") is True:
            output_data["extras"]["qcvars"]["PAIRWISE DISPERSION CORRECTION ANALYSIS"] = D3pairs
        output_data["success"] = True

        return AtomicResult(**{**input_model.dict(), **output_data})
Ejemplo n.º 20
0
    def parse_output(self, outfiles: Dict[str, str],
                     input_model: "AtomicInput") -> "AtomicResult":
        stdout = outfiles.pop("stdout")

        # parse energy output (could go further and break into E6, E8, E10 and Cn coeff)
        real = np.array(input_model.molecule.real)
        full_nat = real.shape[0]
        real_nat = np.sum(real)

        for ln in stdout.splitlines():
            if re.match("  Egcp:", ln):
                ene = Decimal(ln.split()[1])
            elif re.match("     normal termination of gCP", ln):
                break
        else:
            if self._defaults["name"] == "GCP" and not (
                (real_nat == 1) and (input_model.driver == "gradient")):
                raise UnknownError(
                    f"Unsuccessful run. Check input, particularly geometry in [a0]. Model: {input_model.model}"
                )

        # parse gradient output
        if outfiles["gcp_gradient"] is not None:
            srealgrad = outfiles["gcp_gradient"].replace("D", "E")
            realgrad = np.fromstring(srealgrad, count=3 * real_nat,
                                     sep=" ").reshape((-1, 3))
        elif real_nat == 1:
            realgrad = np.zeros((1, 3))

        if input_model.driver == "gradient":
            ireal = np.argwhere(real).reshape((-1))
            fullgrad = np.zeros((full_nat, 3))
            try:
                fullgrad[ireal, :] = realgrad
            except NameError as exc:
                raise UnknownError(
                    "Unsuccessful gradient collection.") from exc

        qcvkey = input_model.model.method.upper()

        calcinfo = []

        calcinfo.append(qcel.Datum("CURRENT ENERGY", "Eh", ene))
        calcinfo.append(qcel.Datum("GCP CORRECTION ENERGY", "Eh", ene))
        if qcvkey:
            calcinfo.append(
                qcel.Datum(f"{qcvkey} GCP CORRECTION ENERGY", "Eh", ene))

        if input_model.driver == "gradient":
            calcinfo.append(qcel.Datum("CURRENT GRADIENT", "Eh/a0", fullgrad))
            calcinfo.append(
                qcel.Datum("GCP CORRECTION GRADIENT", "Eh/a0", fullgrad))
            if qcvkey:
                calcinfo.append(
                    qcel.Datum(f"{qcvkey} GCP CORRECTION GRADIENT", "Eh/a0",
                               fullgrad))

        calcinfo = {info.label: info.data for info in calcinfo}

        # Decimal --> str preserves precision
        calcinfo = {
            k.upper(): str(v) if isinstance(v, Decimal) else v
            for k, v in calcinfo.items()
        }

        retres = calcinfo[f"CURRENT {input_model.driver.upper()}"]
        if isinstance(retres, Decimal):
            retres = float(retres)
        elif isinstance(retres, np.ndarray):
            retres = retres.ravel().tolist()

        output_data = {
            "extras":
            input_model.extras,
            "properties": {},
            "provenance":
            Provenance(creator="GCP",
                       version=self.get_version(),
                       routine=__name__ + "." +
                       sys._getframe().f_code.co_name),
            "return_result":
            retres,
            "stdout":
            stdout,
        }

        output_data["extras"]["qcvars"] = calcinfo

        output_data["success"] = True
        return AtomicResult(**{**input_model.dict(), **output_data})
Ejemplo n.º 21
0
    def parse_output(
        self, outfiles: Dict[str, str], input_model: AtomicInput
    ) -> AtomicResult:  # lgtm: [py/similar-function]

        stdout = outfiles.pop("stdout")
        stderr = outfiles.pop("stderr")

        method = input_model.model.method.lower()
        method = method[3:] if method.startswith("c4-") else method

        # c4mol, if it exists, is dinky, just a clue to geometry of cfour results
        try:
            # July 2021: c4mol & vector returns now atin/outfile orientation depending on fix_com,orientation=T/F. previously always atin orientation
            qcvars, c4hess, c4grad, c4mol, version, module, errorTMP = harvest(
                input_model.molecule, method, stdout, **outfiles
            )
        except Exception:
            raise UnknownError(error_stamp(outfiles["input"], stdout, stderr))

        if errorTMP != "":
            raise UnknownError(error_stamp(outfiles["input"], stdout, stderr))

        try:
            if c4grad is not None:
                qcvars["CURRENT GRADIENT"] = c4grad
                qcvars[f"{method.upper()} TOTAL GRADIENT"] = c4grad

            if c4hess is not None:
                qcvars[f"{method.upper()} TOTAL HESSIAN"] = c4hess
                qcvars["CURRENT HESSIAN"] = c4hess

            if input_model.driver.upper() == "PROPERTIES":
                retres = qcvars[f"CURRENT ENERGY"]
            else:
                retres = qcvars[f"CURRENT {input_model.driver.upper()}"]
        except KeyError:
            raise UnknownError(error_stamp(outfiles["input"], stdout, stderr))

        # TODO: "xalloc(): memory allocation failed!"

        if isinstance(retres, Decimal):
            retres = float(retres)
        elif isinstance(retres, np.ndarray):
            retres = retres.ravel().tolist()

        build_out(qcvars)
        atprop = build_atomicproperties(qcvars)

        provenance = Provenance(creator="CFOUR", version=self.get_version(), routine="xcfour").dict()
        if module is not None:
            provenance["module"] = module

        output_data = {
            "schema_version": 1,
            "molecule": c4mol,  # overwrites with outfile Cartesians in case fix_*=F
            "extras": {**input_model.extras},
            "native_files": {k: v for k, v in outfiles.items() if v is not None},
            "properties": atprop,
            "provenance": provenance,
            "return_result": retres,
            "stderr": stderr,
            "stdout": stdout,
            "success": True,
        }

        # got to even out who needs plump/flat/Decimal/float/ndarray/list
        # Decimal --> str preserves precision
        # * formerly unnp(qcvars, flat=True).items()
        output_data["extras"]["qcvars"] = {
            k.upper(): str(v) if isinstance(v, Decimal) else v for k, v in qcvars.items()
        }

        return AtomicResult(**{**input_model.dict(), **output_data})
Ejemplo n.º 22
0
    def compute(self, input_data: "AtomicInput",
                config: "TaskConfig") -> "AtomicResult":
        """
        Runs TorchANI in FF typing
        """

        # Check if existings and version
        self.found(raise_error=True)
        if parse_version(self.get_version()) < parse_version("0.9"):
            raise ResourceError(
                "QCEngine's TorchANI wrapper requires version 0.9 or greater.")

        import torch
        import torchani
        import numpy as np

        device = torch.device("cpu")

        # Failure flag
        ret_data = {"success": False}

        # Build model
        method = input_data.model.method
        model = self.get_model(method)

        # Build species
        species = input_data.molecule.symbols

        known_sym = {"H", "C", "N", "O"}
        if method.lower() == "ani2x":
            known_sym.update({"S", "F", "Cl"})

        unknown_sym = set(species) - known_sym
        if unknown_sym:
            raise InputError(
                f"TorchANI model '{method}' does not support symbols: {unknown_sym}."
            )

        num_atoms = len(species)
        species = model.species_to_tensor(species).to(device).unsqueeze(0)

        # Build coord array
        geom_array = input_data.molecule.geometry.reshape(
            1, -1, 3) * ureg.conversion_factor("bohr", "angstrom")
        coordinates = torch.tensor(geom_array.tolist(),
                                   requires_grad=True,
                                   device=device)

        _, energy_array = model((species, coordinates))
        energy = energy_array.mean()
        ensemble_std = energy_array.std()
        ensemble_scaled_std = ensemble_std / np.sqrt(num_atoms)

        ret_data["properties"] = {"return_energy": energy.item()}

        if input_data.driver == "energy":
            ret_data["return_result"] = ret_data["properties"]["return_energy"]
        elif input_data.driver == "gradient":
            derivative = torch.autograd.grad(energy.sum(),
                                             coordinates)[0].squeeze()
            ret_data["return_result"] = (np.asarray(
                derivative *
                ureg.conversion_factor("angstrom", "bohr")).ravel().tolist())
        elif input_data.driver == "hessian":
            hessian = torchani.utils.hessian(coordinates, energies=energy)
            ret_data["return_result"] = np.asarray(hessian)
        else:
            raise InputError(
                f"TorchANI can only compute energy, gradient, and hessian driver methods. Found {input_data.driver}."
            )

        #######################################################################
        # Description of the quantities stored in `extras`
        #
        # ensemble_energies:
        #   An energy array of all members (models) in an ensemble of models
        #
        # ensemble_energy_avg:
        #   The average value of energy array which is also recorded with as
        #   `energy` in QCEngine
        #
        # ensemble_energy_std:
        #   The standard deviation of energy array
        #
        # ensemble_per_root_atom_disagreement:
        #   The standard deviation scaled by the square root of N, with N being
        #   the number of atoms in the molecule. This is the quantity used in
        #   the query-by-committee (QBC) process in active learning to infer
        #   the reliability of the models in an ensemble, and produce more data
        #   points in the regions where this quantity is below a certain
        #   threshold (inclusion criteria)
        ret_data["extras"] = input_data.extras.copy()
        ret_data["extras"].update({
            "ensemble_energies":
            energy_array.detach().numpy(),
            "ensemble_energy_avg":
            energy.item(),
            "ensemble_energy_std":
            ensemble_std.item(),
            "ensemble_per_root_atom_disagreement":
            ensemble_scaled_std.item(),
        })

        ret_data["provenance"] = Provenance(
            creator="torchani",
            version="unknown",
            routine="torchani.builtin.aev_computer")

        ret_data["schema_name"] = "qcschema_output"
        ret_data["success"] = True

        # Form up a dict first, then sent to BaseModel to avoid repeat kwargs which don't override each other
        return AtomicResult(**{**input_data.dict(), **ret_data})
Ejemplo n.º 23
0
    def compute(self, input_data: "AtomicInput",
                config: "TaskConfig") -> "AtomicResult":
        """
        Runs OpenMM on given structure, inputs, in vacuum.
        """
        self.found(raise_error=True)

        from simtk import openmm
        from simtk import unit

        with capture_stdout():
            import openforcefield.topology as offtop

        # Failure flag
        ret_data = {"success": False}

        # generate basis, not given
        if not input_data.model.basis:
            raise InputError("Method must contain a basis set.")

        # Make sure we are using smirnoff or antechamber
        basis = input_data.model.basis.lower()
        if basis in ["smirnoff", "antechamber"]:

            with capture_stdout():
                # try and make the molecule from the cmiles
                cmiles = None
                if input_data.molecule.extras:
                    cmiles = input_data.molecule.extras.get(
                        "canonical_isomeric_explicit_hydrogen_mapped_smiles",
                        None)
                    if cmiles is None:
                        cmiles = input_data.molecule.extras.get(
                            "cmiles", {}
                        ).get(
                            "canonical_isomeric_explicit_hydrogen_mapped_smiles",
                            None)

                if cmiles is not None:
                    off_mol = offtop.Molecule.from_mapped_smiles(
                        mapped_smiles=cmiles)
                    # add the conformer
                    conformer = unit.Quantity(value=np.array(
                        input_data.molecule.geometry),
                                              unit=unit.bohr)
                    off_mol.add_conformer(conformer)
                else:
                    # Process molecule with RDKit
                    rdkit_mol = RDKitHarness._process_molecule_rdkit(
                        input_data.molecule)

                    # Create an Open Force Field `Molecule` from the RDKit Molecule
                    off_mol = offtop.Molecule(rdkit_mol)

            # now we need to create the system
            openmm_system = self._generate_openmm_system(
                molecule=off_mol,
                method=input_data.model.method,
                keywords=input_data.keywords)
        else:
            raise InputError(
                "Accepted bases are: {'smirnoff', 'antechamber', }")

        # Need an integrator for simulation even if we don't end up using it really
        integrator = openmm.VerletIntegrator(1.0 * unit.femtoseconds)

        # Set platform to CPU explicitly
        platform = openmm.Platform.getPlatformByName("CPU")

        # Set number of threads to use
        # if `nthreads` is `None`, OpenMM default of all logical cores on
        # processor will be used
        nthreads = config.ncores
        if nthreads is None:
            nthreads = os.environ.get("OPENMM_CPU_THREADS")

        if nthreads:
            properties = {"Threads": str(nthreads)}
        else:
            properties = {}

        # Initialize context
        context = openmm.Context(openmm_system, integrator, platform,
                                 properties)

        # Set positions from our Open Force Field `Molecule`
        context.setPositions(off_mol.conformers[0])

        # Compute the energy of the configuration
        state = context.getState(getEnergy=True)

        # Get the potential as a simtk.unit.Quantity, put into units of hartree
        q = state.getPotentialEnergy(
        ) / unit.hartree / unit.AVOGADRO_CONSTANT_NA

        ret_data["properties"] = {"return_energy": q}

        # Execute driver
        if input_data.driver == "energy":
            ret_data["return_result"] = ret_data["properties"]["return_energy"]

        elif input_data.driver == "gradient":
            # Compute the forces
            state = context.getState(getForces=True)

            # Get the gradient as a simtk.unit.Quantity with shape (n_atoms, 3)
            gradient = state.getForces(asNumpy=True)

            # Convert to hartree/bohr and reformat as 1D array
            q = (gradient / (unit.hartree / unit.bohr)
                 ).reshape(-1) / unit.AVOGADRO_CONSTANT_NA

            # Force to gradient
            ret_data["return_result"] = -1 * q
        else:
            raise InputError(
                f"OpenMM can only compute energy and gradient driver methods. Found {input_data.driver}."
            )

        ret_data["success"] = True
        ret_data["extras"] = input_data.extras

        # Move several pieces up a level
        ret_data["provenance"] = Provenance(creator="openmm",
                                            version=openmm.__version__,
                                            nthreads=nthreads)

        return AtomicResult(**{**input_data.dict(), **ret_data})
Ejemplo n.º 24
0
    def parse_output(
        self, outfiles: Dict[str, str], input_model: "AtomicInput"
    ) -> AtomicResult:  # lgtm: [py/similar-function]

        # Get the stdout from the calculation (required)
        stdout = outfiles.pop("stdout")
        stderr = outfiles.pop("stderr")

        method = input_model.model.method.lower()
        method = method[4:] if method.startswith("nwc-") else method

        # Read the NWChem stdout file and, if needed, the hess or grad files
        # July 2021: nwmol & vector returns now atin/outfile orientation depending on fix_com,orientation=T/F. previously always atin orientation
        try:
            qcvars, nwhess, nwgrad, nwmol, version, module, errorTMP = harvest(
                input_model.molecule, method, stdout, **outfiles)
        except Exception:
            raise UnknownError(error_stamp(outfiles["input"], stdout, stderr))

        try:
            if nwgrad is not None:
                qcvars[f"{method.upper()} TOTAL GRADIENT"] = nwgrad
                qcvars["CURRENT GRADIENT"] = nwgrad

            if nwhess is not None:
                qcvars[f"{method.upper()} TOTAL HESSIAN"] = nwhess
                qcvars["CURRENT HESSIAN"] = nwhess

            # Normalize the output as a float or list of floats
            if input_model.driver.upper() == "PROPERTIES":
                retres = qcvars[f"CURRENT ENERGY"]
            else:
                retres = qcvars[f"CURRENT {input_model.driver.upper()}"]
        except KeyError:
            raise UnknownError(error_stamp(outfiles["input"], stdout, stderr))

        if isinstance(retres, Decimal):
            retres = float(retres)
        elif isinstance(retres, np.ndarray):
            retres = retres.tolist()

        # Get the formatted properties
        build_out(qcvars)
        atprop = build_atomicproperties(qcvars)

        provenance = Provenance(creator="NWChem",
                                version=self.get_version(),
                                routine="nwchem").dict()
        if module is not None:
            provenance["module"] = module

        # Format them inout an output
        output_data = {
            "schema_version": 1,
            "molecule":
            nwmol,  # overwrites with outfile Cartesians in case fix_*=F
            "extras": {
                **input_model.extras
            },
            "native_files":
            {k: v
             for k, v in outfiles.items() if v is not None},
            "properties": atprop,
            "provenance": provenance,
            "return_result": retres,
            "stderr": stderr,
            "stdout": stdout,
            "success": True,
        }

        # got to even out who needs plump/flat/Decimal/float/ndarray/list
        # Decimal --> str preserves precision
        # * formerly unnp(qcvars, flat=True).items()
        output_data["extras"]["qcvars"] = {
            k.upper(): str(v) if isinstance(v, Decimal) else v
            for k, v in qcvars.items()
        }

        return AtomicResult(**{**input_model.dict(), **output_data})
Ejemplo n.º 25
0
def job_output_to_atomic_result(*, atomic_input: AtomicInput,
                                job_output: pb.JobOutput) -> AtomicResult:
    """Convert JobOutput to AtomicResult"""
    # Convert job_output to python types
    # NOTE: Required so that AtomicResult is JSON serializable. Protobuf types are not.
    jo_dict = MessageToDict(job_output, preserving_proto_field_name=True)

    if atomic_input.driver.upper() == "ENERGY":
        # Select first element in list (ground state); may need to modify for excited
        # states
        return_result: Union[float, List[float]] = jo_dict["energy"][0]

    elif atomic_input.driver.upper() == "GRADIENT":
        return_result = jo_dict["gradient"]

    else:
        raise ValueError(
            f"Unsupported driver: {atomic_input.driver.upper()}, supported drivers "
            f"include: {SUPPORTED_DRIVERS}")

    if atomic_input.keywords.get("molden"):
        # Molden file was request
        try:
            molden_string = tcpb_imd_fields2molden_string(job_output)
        except Exception:
            # Don't know how this code will blow up, so except everything for now :/
            # NOTE: mo_output will set imd_orbital_type to "WHOLE_C"
            molden_string = "Unable to create molden output. Did you include the 'mo_output' keyword??"
    else:
        molden_string = None

    # Prepare AtomicInput to be base input for AtomicResult
    atomic_input_dict = atomic_input.dict()
    atomic_input_dict.pop("provenance", None)

    # Create AtomicResult as superset of AtomicInput values
    atomic_result = AtomicResult(
        **atomic_input_dict,
        # Create new provenance object
        provenance=Provenance(
            creator="terachem_pbs",
            version="1.9-2021.01-dev",
            routine="tcpb.TCProtobufClient.compute",
        ),
        return_result=return_result,
        properties=to_atomic_result_properties(job_output),
        # NOTE: Wavefunction will only be added if atomic_input.protocols.wavefunction != 'none'
        wavefunction=to_wavefunction_properties(job_output, atomic_input),
        success=True,
    )
    # And extend extras to include values additional to input extras
    atomic_result.extras.update({
        "qcvars": {
            "charges": jo_dict.get("charges"),
            "spins": jo_dict.get("spins"),
            "meyer_bond_order": jo_dict.get("bond_order"),
            "orb_size": jo_dict.get("orb_size"),
            "excited_state_energies": jo_dict.get("energy"),
            "cis_transition_dipoles": jo_dict.get("cis_transition_dipoles"),
            "compressed_bond_order": jo_dict.get("compressed_bond_order"),
            "compressed_hessian": jo_dict.get("compressed_hessian"),
            "compressed_ao_data": jo_dict.get("compressed_ao_data"),
            "compressed_primitive_data":
            jo_dict.get("compressed_primitive_data"),
            "compressed_mo_vector": jo_dict.get("compressed_mo_vector"),
            "imd_mmatom_gradient": jo_dict.get("imd_mmatom_gradient"),
        },
        "job_extras": {
            "job_dir": jo_dict.get("job_dir"),
            "job_scr_dir": jo_dict.get("job_scr_dir"),
            "server_job_id": jo_dict.get("server_job_id"),
            "orb1afile": jo_dict.get("orb1afile"),
            "orb1bfile": jo_dict.get("orb1bfile"),
        },
        "molden": molden_string,
    })
    return atomic_result
Ejemplo n.º 26
0
    def compute(self, input_model: "AtomicInput",
                config: "TaskConfig") -> "AtomicResult":
        """
        Runs MRChem in executable mode
        """
        self.found(raise_error=True)

        # Location resolution order config.scratch_dir, /tmp
        parent = config.scratch_directory

        error_message = None
        compute_success = False

        job_input = self.build_input(input_model, config)
        input_data = copy.deepcopy(job_input["mrchem_json"])
        output_data = {
            "keywords": input_data,
            "schema_name": "qcschema_output",
            "schema_version": 1,
            "model": input_model.model,
            "molecule": input_model.molecule,
            "driver": input_model.driver,
        }

        with temporary_directory(parent=parent,
                                 suffix="_mrchem_scratch") as tmpdir:
            # create folders
            for d in job_input["folders"]:
                if not Path(d).exists():
                    Path(d).mkdir()

            # Execute the program
            success, output = execute(
                command=job_input["command"] + ["data.json"],
                infiles={"data.json": json.dumps(job_input["mrchem_json"])},
                outfiles=["data.json"],
                scratch_directory=tmpdir,
            )

            if success:
                output_data["stdout"] = output["stdout"]
                # get data from the MRChem JSON output and transfer it to the QCSchema output
                mrchem_json = json.loads(output["outfiles"]["data.json"])
                mrchem_output = mrchem_json["output"]
                output_data["success"] = mrchem_output["success"]
                output_data["provenance"] = mrchem_output["provenance"]
                # update the "routine" under "provenance"
                output_data["provenance"]["routine"] = " ".join(
                    job_input["command"])

                # fill up properties
                output_data["properties"] = extract_properties(mrchem_output)

                # prepare a list of computed response properties
                known_rsp_props = [
                    ("dipole_moment", "vector"),
                    ("quadrupole_moment", "tensor"),
                    ("polarizability", "tensor"),
                    ("magnetizability", "tensor"),
                    ("nmr_shielding", "tensor"),
                ]
                computed_rsp_props = [
                    ("properties", x, y, z) for x, z in known_rsp_props
                    if x in mrchem_output["properties"]
                    for y in mrchem_output["properties"][x].keys()
                ]

                # fill up extras:
                # * under "raw_output" the whole JSON output from MRChem
                # * under "properties" all the properties computed by MRChem
                output_data["extras"] = {
                    "raw_output": mrchem_json,
                    "properties": {
                        f"{ks[1]}": {
                            f"{ks[2]}": _nested_get(mrchem_output, ks)
                        }
                        for ks in computed_rsp_props
                    },
                }

                # fill up return_result
                if input_model.driver == "energy":
                    output_data["return_result"] = mrchem_output["properties"][
                        "scf_energy"]["E_tot"]
                elif input_model.driver == "properties":
                    output_data["return_result"] = {
                        f"{ks[1]}": {
                            f"{ks[2]}": _nested_get(mrchem_output, ks)
                        }
                        for ks in computed_rsp_props
                    }
                else:
                    raise InputError(
                        f"Driver {input_model.driver} not implemented for MRChem."
                    )

                compute_success = mrchem_output["success"]

            else:
                output_data["stderr"] = output["stderr"]
                output_data["error"] = {
                    "error_message": output["stderr"],
                    "error_type": "execution_error",
                }

        # Dispatch errors, PSIO Errors are not recoverable for future runs
        if compute_success is False:

            if ("SIGSEV" in error_message) or ("SIGSEGV" in error_message) or (
                    "segmentation fault" in error_message):
                raise RandomError(error_message)
            else:
                raise UnknownError(error_message)

        return AtomicResult(**output_data)
Ejemplo n.º 27
0
    def compute(self, input_data: "AtomicInput", config: "TaskConfig") -> "AtomicResult":
        """
        Runs OpenMM on given structure, inputs, in vacuum.
        """
        self.found(raise_error=True)

        from simtk import openmm
        from simtk import unit

        import openforcefield.topology as offtop

        # Failure flag
        ret_data = {"success": False}

        # generate basis, not given
        if not input_data.model.basis:
            basis = self._generate_basis(input_data)
            ret_data["basis"] = basis

        # get number of threads to use from `TaskConfig.ncores`; otherwise, try environment variable
        nthreads = config.ncores
        if nthreads is None:
            nthreads = os.environ.get("OPENMM_CPU_THREADS")

        # Set workdir to scratch
        # Location resolution order config.scratch_dir, /tmp
        parent = config.scratch_directory
        with temporary_directory(parent=parent, suffix="_openmm_scratch") as tmpdir:

            # Grab molecule, forcefield
            jmol = input_data.molecule

            # TODO: If urls are supported by
            # `openforcefield.typing.engines.smirnoff.ForceField` already, we
            # can eliminate the `offxml` and `url` distinction
            # URL processing can happen there instead
            if getattr(input_data.model, "offxml", None):
                # we were given a file path or relative path
                offxml = input_data.model.offxml

                # Load an Open Force Field `ForceField`
                off_forcefield = self._get_off_forcefield(offxml, offxml)
            elif getattr(input_data.model, "url", None):
                # we were given a url
                with urllib.request.urlopen(input_data.model.url) as req:
                    xml = req.read()

                # Load an Open Force Field `ForceField`
                off_forcefield = self._get_off_forcefield(xml.decode(), xml)
            else:
                raise InputError("OpenMM requires either `model.offxml` or `model.url` to be set")

            # Process molecule with RDKit
            rdkit_mol = RDKitHarness._process_molecule_rdkit(jmol)

            # Create an Open Force Field `Molecule` from the RDKit Molecule
            off_mol = offtop.Molecule(rdkit_mol)

            # Create OpenMM system in vacuum from forcefield, molecule
            off_top = off_mol.to_topology()
            openmm_system = self._get_openmm_system(off_forcefield, off_top)

            # Need an integrator for simulation even if we don't end up using it really
            integrator = openmm.VerletIntegrator(1.0 * unit.femtoseconds)

            # Set platform to CPU explicitly
            platform = openmm.Platform.getPlatformByName("CPU")

            # Set number of threads to use
            # if `nthreads` is `None`, OpenMM default of all logical cores on
            # processor will be used
            if nthreads:
                properties = {"Threads": str(nthreads)}
            else:
                properties = {}

            # Initialize context
            context = openmm.Context(openmm_system, integrator, platform, properties)

            # Set positions from our Open Force Field `Molecule`
            context.setPositions(off_mol.conformers[0])

            # Compute the energy of the configuration
            state = context.getState(getEnergy=True)

            # Get the potential as a simtk.unit.Quantity, put into units of hartree
            q = state.getPotentialEnergy() / unit.hartree

            ret_data["properties"] = {"return_energy": q.value_in_unit(q.unit)}

            # Execute driver
            if input_data.driver == "energy":
                ret_data["return_result"] = ret_data["properties"]["return_energy"]

            elif input_data.driver == "gradient":
                # Get number of atoms
                n_atoms = len(jmol.symbols)

                # Compute the forces
                state = context.getState(getForces=True)

                # Get the gradient as a simtk.unit.Quantity with shape (n_atoms, 3)
                gradient = state.getForces(asNumpy=True)

                # Convert to hartree/bohr and reformat as 1D array
                q = (gradient / (unit.hartree / unit.bohr)).reshape([n_atoms * 3])
                ret_data["return_result"] = q.value_in_unit(q.unit)
            else:
                raise InputError(
                    f"OpenMM can only compute energy and gradient driver methods. Found {input_data.driver}."
                )

        ret_data["success"] = True

        # Move several pieces up a level
        ret_data["provenance"] = Provenance(creator="openmm", version=openmm.__version__, nthreads=nthreads)

        return AtomicResult(**{**input_data.dict(), **ret_data})
Ejemplo n.º 28
0
    def parse_output(self, outfiles: Dict[str, str],
                     input_model: "AtomicInput") -> "AtomicResult":

        scf_map = {"energy": "scf_total_energy", "n_iter": "scf_iterations"}
        dft_map = scf_map.copy()
        hf_map = scf_map.copy()
        xtb_map = scf_map.copy()

        energy_command_map = {"dft": dft_map, "hf": hf_map, "xtb": xtb_map}
        extras_map = {"converged": "scf_converged"}
        wavefunction_map = {
            "restricted": {
                "orbitals": "scf_orbitals_a",
                "density": "scf_density_a",
                "fock": "scf_fock_a",
                "eigenvalues": "scf_eigenvalues_a",
                "occupations": "scf_occupations_a",
            },
            "unrestricted": {
                "orbitals_alpha": "scf_orbitals_a",
                "orbitals_beta": "scf_orbitals_b",
                "density_alpha": "scf_density_a",
                "density_beta": "scf_density_b",
                "fock_alpha": "scf_fock_a",
                "fock_beta": "scf_fock_b",
                "eigenvalues_alpha": "scf_eigenvalues_a",
                "eigenvalues_beta": "scf_eigenvalues_b",
                "occupations_alpha": "scf_occupations_a",
                "occupations_beta": "scf_occupations_b",
            },
        }

        # Determine the energy_command
        energy_command = self.determine_energy_command(
            input_model.model.method)

        gradient_map = {"gradient": "gradient"}
        gradient_map.update({"energy": "scf_total_energy"})
        # TODO Uncomment once entos adds scf_map to gradient json results
        # gradient_map.update(energy_command_map[energy_command])

        hessian_map = {"hessian": "hessian"}
        hessian_map.update(energy_command_map[energy_command])

        # Determine whether to use the energy map or the gradient map
        if input_model.driver == "energy":
            entos_map = energy_command_map[energy_command]
        elif input_model.driver == "gradient":
            entos_map = gradient_map
        elif input_model.driver == "hessian":
            entos_map = hessian_map
        else:
            raise NotImplementedError(
                f"Driver {input_model.driver} not implemented for entos.")

        # Parse the results.json output from entos
        properties = {}
        load_results = json.loads(outfiles["results.json"])
        entos_results = load_results["json_results"]
        for key in entos_map.keys():
            if key in entos_results:
                properties[entos_map[key]] = entos_results[key]

        # Parse calcinfo_* properties from the results.json
        if "ao_basis" in entos_results.keys():
            properties["calcinfo_nbasis"] = entos_results["ao_basis"][
                "__Basis"]["n_functions"]
        if "structure" in entos_results.keys():
            properties["calcinfo_natom"] = len(
                entos_results["structure"]["__Atoms"]["atoms"])

        # Parse wavefunction quantities from entos_results
        wavefunction = {}
        if input_model.protocols.wavefunction != "none":

            # First parse basis set information
            if "ao_basis" in entos_results.keys():
                atom_map = [
                    item[0]
                    for item in entos_results["structure"]["__Atoms"]["atoms"]
                ]

                # Each item in electron_shells is a dictionary containing info for one basis function
                electron_shells_by_center = {}
                for basis_item in entos_results["ao_basis"]["__Basis"][
                        "electron_shells"]:
                    center_index = basis_item["center_index"]

                    electron_shell_info = {
                        "angular_momentum": [basis_item["angular_momentum"]],
                        "harmonic_type":
                        basis_item["function_type"].split("_")[-1],
                        "exponents": basis_item["exponents"],
                        "coefficients": basis_item["coefficients"],
                    }
                    if center_index not in electron_shells_by_center:
                        electron_shells_by_center[center_index] = [
                            electron_shell_info
                        ]
                    else:
                        electron_shells_by_center[center_index].append(
                            electron_shell_info)

                # Construct center_data from electron_shells_by_center
                # Note: Duplicate atoms will over write each other
                center_data = {}
                for i in range(len(electron_shells_by_center)):
                    basis_center_info = {
                        "electron_shells": electron_shells_by_center[i]
                    }
                    center_data[atom_map[i]] = basis_center_info

                # Construct BasisSet
                basis_info = {
                    "name": input_model.model.basis,
                    # "description": "", # None provided by entos
                    "center_data": center_data,
                    "atom_map": atom_map,
                    "nbf": entos_results["ao_basis"]["__Basis"]["n_functions"],
                }
                basis_set = BasisSet(**basis_info)
                wavefunction["basis"] = basis_set
            else:
                raise KeyError(
                    f"Basis set information not found so wavefunction protocol {input_model.protocols.wavefunction} is not available."
                )

            # Now parse wavefunction information
            n_channels = entos_results["n_channels"]
            if n_channels == 1:
                wavefunction["restricted"] = True
                for key in wavefunction_map["restricted"].keys():
                    if key in entos_results:
                        if "orbitals" in key:
                            orbitals_transposed = reorder_column_ao_indices(
                                np.array(entos_results[key]), basis_set,
                                self._entos_to_cca_ao_order)
                            wavefunction[wavefunction_map["restricted"][
                                key]] = orbitals_transposed.transpose()
                        elif "density" in key or "fock" in key:
                            wavefunction[wavefunction_map["restricted"][
                                key]] = reorder_row_and_column_ao_indices(
                                    entos_results[key], basis_set,
                                    self._entos_to_cca_ao_order)
                        else:
                            wavefunction[wavefunction_map["restricted"]
                                         [key]] = entos_results[key]
            # TODO Add a test in QCEngineRecords
            elif n_channels == 2:
                wavefunction["restricted"] = False
                for key in wavefunction_map["unrestricted"].keys():
                    if key in entos_results:
                        if "orbitals" in key:
                            orbitals_transposed = reorder_column_ao_indices(
                                np.array(entos_results[key]), basis_set,
                                self._entos_to_cca_ao_order)
                            wavefunction[wavefunction_map["restricted"][
                                key]] = orbitals_transposed.transpose()
                        elif "density" in key or "fock" in key:
                            wavefunction[wavefunction_map["restricted"][
                                key]] = reorder_row_and_column_ao_indices(
                                    entos_results[key], basis_set,
                                    self._entos_to_cca_ao_order)
                        else:
                            wavefunction[wavefunction_map["restricted"]
                                         [key]] = entos_results[key]

        # Parse results for the extras_map from results.json
        extras = {}
        for key in extras_map.keys():
            if key in entos_results:
                extras[extras_map[key]] = entos_results[key]

        # Initialize output_data by copying over input_model.dict()
        output_data = input_model.dict()

        # Determine the correct return_result
        if input_model.driver == "energy":
            if "scf_total_energy" in properties:
                output_data["return_result"] = properties["scf_total_energy"]
            else:
                raise KeyError(
                    f"Could not find {input_model.model} total energy")
        elif input_model.driver == "gradient" or input_model.driver == "hessian":
            if input_model.driver in properties:
                output_data["return_result"] = properties.pop(
                    input_model.driver)
            else:
                raise KeyError(f"{input_model.driver} not found.")
        else:
            raise NotImplementedError(
                f"Driver {input_model.driver} not implemented for entos.")

        output_data["properties"] = properties
        if input_model.protocols.wavefunction != "none":
            output_data["wavefunction"] = wavefunction
        output_data["extras"].update(extras)
        output_data["schema_name"] = "qcschema_output"
        output_data["success"] = True

        return AtomicResult(**output_data)
Ejemplo n.º 29
0
    def compute(self, input_model: "AtomicInput",
                config: "TaskConfig") -> "AtomicResult":
        """
        Runs Psi4 in API mode
        """
        self.found(raise_error=True)
        pversion = parse_version(self.get_version())

        if pversion < parse_version("1.2"):
            raise ResourceError("Psi4 version '{}' not understood.".format(
                self.get_version()))

        # Location resolution order config.scratch_dir, $PSI_SCRATCH, /tmp
        parent = config.scratch_directory
        if parent is None:
            parent = os.environ.get("PSI_SCRATCH", None)

        error_type = None
        error_message = None
        compute_success = False

        if isinstance(input_model.model.basis, BasisSet):
            raise InputError(
                "QCSchema BasisSet for model.basis not implemented. Use string basis name."
            )

        # Basis must not be None for HF3c
        old_basis = input_model.model.basis
        input_model.model.__dict__["basis"] = old_basis or ""

        with temporary_directory(parent=parent,
                                 suffix="_psi_scratch") as tmpdir:

            caseless_keywords = {
                k.lower(): v
                for k, v in input_model.keywords.items()
            }
            if (input_model.molecule.molecular_multiplicity !=
                    1) and ("reference" not in caseless_keywords):
                input_model.keywords["reference"] = "uhf"

            # Old-style JSON-based command line
            if pversion < parse_version("1.4a2.dev160"):

                # Setup the job
                input_data = input_model.dict(encoding="json")
                input_data["nthreads"] = config.ncores
                input_data["memory"] = int(config.memory * 1024 * 1024 * 1024 *
                                           0.95)  # Memory in bytes
                input_data["success"] = False
                input_data["return_output"] = True

                if input_data["schema_name"] == "qcschema_input":
                    input_data["schema_name"] = "qc_schema_input"

                # Execute the program
                success, output = execute(
                    [
                        which("psi4"), "--scratch", tmpdir, "--json",
                        "data.json"
                    ],
                    {"data.json": json.dumps(input_data)},
                    ["data.json"],
                    scratch_directory=tmpdir,
                )

                output_data = input_data.copy()
                if success:
                    output_data = json.loads(output["outfiles"]["data.json"])
                    if "extras" not in output_data:
                        output_data["extras"] = {}

                    # Check QCVars
                    local_qcvars = output_data.pop("psi4:qcvars", None)
                    if local_qcvars:
                        # Edge case where we might already have qcvars, should not happen
                        if "qcvars" in output_data["extras"]:
                            output_data["extras"][
                                "local_qcvars"] = local_qcvars
                        else:
                            output_data["extras"]["qcvars"] = local_qcvars

                    if output_data.get("success", False) is False:
                        error_message, error_type = self._handle_errors(
                            output_data)
                    else:
                        compute_success = True

                else:
                    error_message = output.get("stderr", "No STDERR output")
                    error_type = "execution_error"

                # Reset the schema if required
                output_data["schema_name"] = "qcschema_output"
                output_data.pop("memory", None)
                output_data.pop("nthreads", None)
                output_data["stdout"] = output_data.pop("raw_output", None)

            else:

                if input_model.extras.get("psiapi", False):
                    import psi4

                    orig_scr = psi4.core.IOManager.shared_object(
                    ).get_default_path()
                    psi4.core.set_num_threads(config.ncores, quiet=True)
                    psi4.set_memory(f"{config.memory}GB", quiet=True)
                    # psi4.core.IOManager.shared_object().set_default_path(str(tmpdir))
                    if pversion < parse_version(
                            "1.4"):  # adjust to where DDD merged
                        # slightly dangerous in that if `qcng.compute({..., psiapi=True}, "psi4")` called *from psi4
                        #   session*, session could unexpectedly get its own files cleaned away.
                        output_data = psi4.schema_wrapper.run_qcschema(
                            input_model).dict()
                    else:
                        output_data = psi4.schema_wrapper.run_qcschema(
                            input_model, postclean=False).dict()
                    # success here means execution returned. output_data may yet be qcel.models.AtomicResult or qcel.models.FailedOperation
                    success = True
                    if output_data.get("success", False):
                        output_data["extras"]["psiapi_evaluated"] = True
                    psi4.core.IOManager.shared_object().set_default_path(
                        orig_scr)
                else:
                    run_cmd = [
                        which("psi4"),
                        "--scratch",
                        str(tmpdir),
                        "--nthread",
                        str(config.ncores),
                        "--memory",
                        f"{config.memory}GB",
                        "--qcschema",
                        "data.msgpack",
                    ]
                    input_files = {
                        "data.msgpack": input_model.serialize("msgpack-ext")
                    }
                    success, output = execute(run_cmd,
                                              input_files, ["data.msgpack"],
                                              as_binary=["data.msgpack"],
                                              scratch_directory=tmpdir)
                    if success:
                        output_data = deserialize(
                            output["outfiles"]["data.msgpack"], "msgpack-ext")
                    else:
                        output_data = input_model.dict()

                if success:
                    if output_data.get("success", False) is False:
                        error_message, error_type = self._handle_errors(
                            output_data)
                    else:
                        compute_success = True
                else:
                    error_message = output.get("stderr", "No STDERR output")
                    error_type = "execution_error"

        # Dispatch errors, PSIO Errors are not recoverable for future runs
        if compute_success is False:

            if "PSIO Error" in error_message:
                if "scratch directory" in error_message:
                    # Psi4 cannot access the folder or file
                    raise ResourceError(error_message)
                else:
                    # Likely a random error, worth retrying
                    raise RandomError(error_message)
            elif ("SIGSEV" in error_message) or (
                    "SIGSEGV" in error_message) or ("segmentation fault"
                                                    in error_message):
                raise RandomError(error_message)
            elif ("TypeError: set_global_option"
                  in error_message) or (error_type == "ValidationError"):
                raise InputError(error_message)
            elif "RHF reference is only for singlets" in error_message:
                raise InputError(error_message)
            else:
                raise UnknownError(error_message)

        # Reset basis
        output_data["model"]["basis"] = old_basis

        # Move several pieces up a level
        output_data["provenance"]["memory"] = round(config.memory, 3)
        output_data["provenance"]["nthreads"] = config.ncores

        # Delete keys
        output_data.pop("return_output", None)

        return AtomicResult(**output_data)
Ejemplo n.º 30
0
    def parse_output(self, outfiles: Dict[str, str],
                     input_model: "AtomicInput") -> "AtomicResult":
        tree = ET.ElementTree(ET.fromstring(outfiles["dispatch.xml"]))
        root = tree.getroot()
        # print(root.tag)

        # TODO Read information from molecule tag
        #      - cml:molecule, cml:atomArray (?)
        #      - basisSet
        #       - Be aware of symmetry. Might only be able to support if symmetry,nosym
        #      - orbitals
        #       - Be aware of symmetry. Might only be able to support if symmetry,nosym
        # NOTE: Spherical basis set ordering in Molpro (with no symmetry)
        # S -->  0
        # P --> +1, -1, 0
        # D -->  0, -2, +1, +2, -1
        # F --> +1, -1, 0, +3, -2, -3, +2
        # G -->  0, -2, +1, +4, -1, +2, -4, +3, -3
        # H --> +1, -1, +2, +3, -4, -3, +4, -5, 0, +5, -2
        # I --> +6, -2, +5, +4, -5, +2, -6, +3, -4, 0, -3, -1, +1
        properties = {}
        extras = {}
        name_space = {
            "molpro_uri": "http://www.molpro.net/schema/molpro-output"
        }

        # Molpro commands map
        molpro_map = {
            "Energy": {
                "HF": "scf_total_energy",
                "RHF": "scf_total_energy",
                "UHF": "scf_total_energy",
                "KS": "scf_total_energy",
                "RKS": "scf_total_energy",
                "UKS": "scf_total_energy",
            },
            "total energy": {
                "MP2": "mp2_total_energy",
                "CCSD": "ccsd_total_energy",
                "CCSD(T)": "ccsd_prt_pr_total_energy",
            },
            "correlation energy": {
                "MP2": "mp2_correlation_energy",
                "CCSD": "ccsd_correlation_energy",
                "CCSD(T)":
                "ccsd_prt_pr_correlation_energy",  # Need both CCSD(T) and Total
                "Total":
                "ccsd_prt_pr_correlation_energy",  # Total corresponds to CCSD(T) correlation energy
            },
            "singlet pair energy": {
                "MP2": "mp2_singlet_pair_energy",
                "CCSD": "ccsd_singlet_pair_energy"
            },
            "triplet pair energy": {
                "MP2": "mp2_triplet_pair_energy",
                "CCSD": "ccsd_triplet_pair_energy"
            },
            "Dipole moment": {
                "HF": "scf_dipole_moment",
                "RHF": "scf_dipole_moment",
                "UHF": "scf_dipole_moment",
                "KS": "scf_dipole_moment",
                "RKS": "scf_dipole_moment",
                "UKS": "scf_dipole_moment",
                "MP2": "mp2_dipole_moment",
                "CCSD": "ccsd_dipole_moment",
                "CCSD(T)": "ccsd_prt_pr_dipole_moment",
            },
        }

        # Started adding basic support for local correlation methods in Molpro
        molpro_extras_map = {
            "total energy": {
                "LMP2": "local_mp2_total_energy",
                "LCCSD": "local_ccsd_total_energy",
                "LCCSD(T0)": "local_ccsd_prt0_pr_total_energy",
                "LCCSD(T)": "local_ccsd_prt_pr_total_energy",
            },
            "correlation energy": {
                "LMP2": "local_mp2_correlation_energy",
                "LCCSD": "local_ccsd_correlation_energy"
            },
            "singlet pair energy": {
                "LMP2": "local_mp2_singlet_pair_energy",
                "LCCSD": "local_ccsd_singlet_pair_energy"
            },
            "triplet pair energy": {
                "LMP2": "local_mp2_triplet_pair_energy",
                "LCCSD": "local_ccsd_triplet_pair_energy"
            },
            "singles energy": {
                "LCCSD": "local_ccsd_singles_energy"
            },
            # "strong pair energy": {
            #     "LCCSD": "local_ccsd_strong_pair_energy"
            # },
            # "weak pair energy": {
            #     "LMP2": "local_mp2_weak_pair_energy"
            # }
        }

        # Molpro variables map used for quantities not found in the command map
        molpro_variable_map = {
            "_ENUC": "nuclear_repulsion_energy",
            "_DFTFUN": "scf_xc_energy",
            "_NELEC": ["calcinfo_nalpha", "calcinfo_nbeta"]
            # "_EMP2_SCS": "scs_mp2_total_energy"
        }

        # Process data in molpro_map by looping through each jobstep
        # The jobstep tag in Molpro contains output from commands (e.g. {HF}, {force})
        for jobstep in root.findall("molpro_uri:job/molpro_uri:jobstep",
                                    name_space):
            command = jobstep.attrib["command"]
            if "FORCE" in command:  # Grab gradient
                for child in jobstep.findall("molpro_uri:gradient",
                                             name_space):
                    # Stores gradient as a single list where the ordering is [1x, 1y, 1z, 2x, 2y, 2z, ...]
                    properties["gradient"] = [
                        float(x) for x in child.text.split()
                    ]
            else:  # Grab energies and dipole moment
                for child in jobstep.findall("molpro_uri:property",
                                             name_space):
                    property_name = child.attrib["name"]
                    property_method = child.attrib["method"]
                    value = child.attrib["value"]
                    if property_name in molpro_map and property_method in molpro_map[
                            property_name]:
                        if property_name == "Dipole moment":
                            properties[molpro_map[property_name]
                                       [property_method]] = [
                                           float(x) for x in value.split()
                                       ]
                        else:
                            properties[molpro_map[property_name]
                                       [property_method]] = float(value)
                    elif property_name in molpro_extras_map and property_method in molpro_extras_map[
                            property_name]:
                        extras[molpro_extras_map[property_name]
                               [property_method]] = float(value)

        # Convert triplet and singlet pair correlation energies to opposite-spin and same-spin correlation energies
        if "mp2_singlet_pair_energy" in properties and "mp2_triplet_pair_energy" in properties:
            properties["mp2_same_spin_correlation_energy"] = (
                2.0 / 3.0) * properties["mp2_triplet_pair_energy"]
            properties["mp2_opposite_spin_correlation_energy"] = (
                1.0 /
                3.0) * properties["mp2_triplet_pair_energy"] + properties[
                    "mp2_singlet_pair_energy"]
            del properties["mp2_singlet_pair_energy"]
            del properties["mp2_triplet_pair_energy"]

        if "ccsd_singlet_pair_energy" in properties and "ccsd_triplet_pair_energy" in properties:
            properties["ccsd_same_spin_correlation_energy"] = (
                2.0 / 3.0) * properties["ccsd_triplet_pair_energy"]
            properties["ccsd_opposite_spin_correlation_energy"] = (
                1.0 /
                3.0) * properties["ccsd_triplet_pair_energy"] + properties[
                    "ccsd_singlet_pair_energy"]
            del properties["ccsd_singlet_pair_energy"]
            del properties["ccsd_triplet_pair_energy"]

        # Process data in molpro_variable_map
        # Note: For the DFT case molecule_method is the name of the functional plus R or U in front
        molecule = root.find("molpro_uri:job/molpro_uri:molecule", name_space)
        molecule_method = molecule.attrib["method"]
        molecule_final_energy = float(
            molecule.attrib["energy"]
        )  # Energy from the molecule tag in case its needed
        # Loop over each variable under the variables tag to grab additional info from molpro_variable_map
        for variable in molecule.findall(
                "molpro_uri:variables/molpro_uri:variable", name_space):
            variable_name = variable.attrib["name"]
            if variable_name in molpro_variable_map:
                if variable_name == "_NELEC":
                    nelec = int(float(variable[0].text))
                    nunpaired = input_model.molecule.molecular_multiplicity - 1
                    nbeta = (nelec - nunpaired) // 2
                    nalpha = nelec - nbeta
                    properties[molpro_variable_map[variable_name][0]] = nalpha
                    properties[molpro_variable_map[variable_name][1]] = nbeta
                else:
                    properties[molpro_variable_map[variable_name]] = float(
                        variable[0].text)

        # Process basis set data
        basis_set = root.find(
            "molpro_uri:job/molpro_uri:molecule/molpro_uri:basisSet",
            name_space)
        nbasis = int(basis_set.attrib["length"])
        # angular_type = basis_set.attrib['angular']  # cartesian vs spherical
        properties["calcinfo_nbasis"] = nbasis

        # Grab the method from input
        method = input_model.model.method.upper()

        # Determining the final energy
        # Throws an error if the energy isn't found for the method specified from the input_model.
        if method in molpro_map["total energy"].keys(
        ) and molpro_map["total energy"][method] in properties:
            final_energy = properties[molpro_map["total energy"][method]]
        elif method in molpro_map["Energy"].keys(
        ) and molpro_map["Energy"][method] in properties:
            final_energy = properties[molpro_map["Energy"][method]]
        else:
            # Back up method for determining final energy if not already present in properties
            # Use the total energy from the molecule tag if it matches the input method
            # if input_model.model.method.upper() in molecule_method:
            if method in molecule_method:
                final_energy = molecule_final_energy
                if method in self._post_hf_methods:
                    properties[molpro_map["total energy"]
                               [method]] = molecule_final_energy
                    properties[molpro_map["correlation energy"][method]] = (
                        molecule_final_energy - properties["scf_total_energy"])
                elif method in self._dft_functionals:
                    properties[molpro_map["Energy"]
                               ["KS"]] = molecule_final_energy
                elif method in self._hf_methods:
                    properties[molpro_map["Energy"]
                               [method]] = molecule_final_energy
            else:
                raise KeyError(f"Could not find {method} total energy")

        # Initialize output_data by copying over input_model.dict()
        output_data = input_model.dict()

        # Determining return_result
        if input_model.driver == "energy":
            output_data["return_result"] = final_energy
        elif input_model.driver == "gradient":
            output_data["return_result"] = properties.pop("gradient")

        # Final output_data assignments needed for the AtomicResult object
        output_data["properties"] = properties
        output_data["extras"].update(extras)
        output_data["schema_name"] = "qcschema_output"
        output_data["stdout"] = outfiles["dispatch.out"]
        output_data["success"] = True

        return AtomicResult(**output_data)