Beispiel #1
0
def test_from_data_kwargs():
    mol = Molecule.from_data(
        """
        O 0 0 0
        H 0 1.5 0
        H 0 0 1.5
        """,
        molecular_charge=1,
        molecular_multiplicity=2,
        fragment_charges=[1],
        fragment_multiplicities=[2],
    )
    assert mol.molecular_charge == 1
    assert mol.molecular_multiplicity == 2
    assert mol.fragment_charges[0] == 1
    assert mol.fragment_multiplicities[0] == 2

    mol = Molecule.from_data(
        """
            O 0 0 0
            H 0 1.5 0
            H 0 0 1.5
            """,
        molecular_charge=1,
        molecular_multiplicity=2,
    )
    assert mol.molecular_charge == 1
    assert mol.molecular_multiplicity == 2
    assert mol.fragment_charges[0] == 1
    assert mol.fragment_multiplicities[0] == 2
Beispiel #2
0
def test_molecule_np_constructors():
    """
    Neon tetramer fun
    """
    ### Neon Tetramer
    neon_from_psi = Molecule.from_data(
        """
        Ne 0.000000 0.000000 0.000000
        --
        Ne 3.100000 0.000000 0.000000
        --
        Ne 0.000000 3.200000 0.000000
        --
        Ne 0.000000 0.000000 3.300000
        units bohr""",
        dtype="psi4",
    )
    ele = np.array([10, 10, 10, 10]).reshape(-1, 1)
    npneon = np.hstack((ele, neon_from_psi.geometry))
    neon_from_np = Molecule.from_data(npneon,
                                      name="neon tetramer",
                                      dtype="numpy",
                                      frags=[1, 2, 3],
                                      units="bohr")

    assert neon_from_psi == neon_from_np

    # Check the JSON construct/deconstruct
    neon_from_json = Molecule.from_data(neon_from_psi.json(), dtype="json")
    assert neon_from_psi == neon_from_json
    assert neon_from_json.get_molecular_formula() == "Ne4"
Beispiel #3
0
def relax_structure(smiles: str,
                    qc_config: QCInputSpecification,
                    compute_config: Optional[Union[TaskConfig, Dict]] = None,
                    compute_connectivity: bool = False,
                    code: str = _code) -> Tuple[str, float]:
    """Compute the atomization energy of a molecule given the SMILES string

    Args:
        smiles (str): SMILES of a molecule
        qc_config (dict): Quantum Chemistry configuration used for evaluating the energy
        compute_config (TaskConfig): Configuration for the quantum chemistry code
        compute_connectivity (bool): Whether we must compute connectivity before calling code
        code (str): Which QC code to use for the evaluation
    Returns:
        (str): Structure of the molecule
        (float): Electronic energy of this molecule
    """
    # Generate 3D coordinates by minimizing MMFF forcefield
    xyz = generate_atomic_coordinates(smiles)
    mol = Molecule.from_data(xyz, dtype='xyz')

    # Generate connectivity, if needed
    if compute_connectivity:
        conn = guess_connectivity(mol.symbols, mol.geometry, default_connectivity=1.0)
        mol = Molecule.from_data({**mol.dict(), 'connectivity': conn})

    # Run the relaxation
    opt_input = OptimizationInput(input_specification=qc_config,
                                  initial_molecule=mol,
                                  keywords={'program': code, 'convergence_set': 'GAU_VERYTIGHT'})
    res: OptimizationResult = \
        compute_procedure(opt_input, 'geometric', local_options=compute_config, raise_error=True)
    return res.final_molecule.to_string('xyz'), res.energies[-1]
Beispiel #4
0
def test_water_orient():
    # These are identical molecules, should find the correct results
    mol = Molecule.from_data(
        """
        O  -1.551007  -0.114520   0.000000
        H  -1.934259   0.762503   0.000000
        H  -0.599677   0.040712   0.000000
        --
        O  -0.114520  -1.551007  10.000000
        H   0.762503  -1.934259  10.000000
        H   0.040712  -0.599677  10.000000
        """
    )

    frag_0 = mol.get_fragment(0, orient=True)
    frag_1 = mol.get_fragment(1, orient=True)

    # Make sure the fragments match
    assert frag_0.get_hash() == frag_1.get_hash()

    # Make sure the complexes match
    frag_0_1 = mol.get_fragment(0, 1, orient=True, group_fragments=True)
    frag_1_0 = mol.get_fragment(1, 0, orient=True, group_fragments=True)
    assert frag_0_1.get_hash() == frag_1_0.get_hash()

    # Fragments not reordered, should be different molecules.
    frag_0_1 = mol.get_fragment(0, 1, orient=True, group_fragments=False)
    frag_1_0 = mol.get_fragment(1, 0, orient=True, group_fragments=False)
    assert frag_0_1.get_hash() != frag_1_0.get_hash()

    # These are identical molecules, but should be different with ghost
    mol = Molecule.from_data(
        """
        O  -1.551007  -0.114520   0.000000
        H  -1.934259   0.762503   0.000000
        H  -0.599677   0.040712   0.000000
        --
        O  -11.551007  -0.114520   0.000000
        H  -11.934259   0.762503   0.000000
        H  -10.599677   0.040712   0.000000
        """,
        dtype="psi4",
        orient=True,
    )

    frag_0 = mol.get_fragment(0, orient=True)
    frag_1 = mol.get_fragment(1, orient=True)

    # Make sure the fragments match
    assert frag_0.molecular_multiplicity == 1
    assert frag_0.get_hash() == frag_1.get_hash()

    # Make sure the complexes match
    frag_0_1 = mol.get_fragment(0, 1, orient=True)
    frag_1_0 = mol.get_fragment(1, 0, orient=True)

    # Ghost fragments should prevent overlap
    assert frag_0_1.molecular_multiplicity == 1
    assert frag_0_1.get_hash() != frag_1_0.get_hash()
def test_hash():
    inchi, xyz = generate_inchi_and_xyz('O')
    mol = Molecule.from_data(xyz, 'xyz')
    assert mol.get_hash() != mol.orient_molecule().get_hash()
    assert get_hash(mol) == get_hash(mol.orient_molecule())

    ox_mol = Molecule.from_data(xyz, 'xyz', molecular_charge=1)
    assert ox_mol.molecular_multiplicity != mol.molecular_multiplicity
    assert mol.get_hash() != ox_mol.get_hash()
    assert get_hash(mol) == get_hash(ox_mol.orient_molecule())
Beispiel #6
0
def test_molecule_data_constructor_numpy():
    water_psi = water_dimer_minima.copy()
    ele = np.array(water_psi.atomic_numbers).reshape(-1, 1)
    npwater = np.hstack((ele, water_psi.geometry * qcel.constants.conversion_factor("Bohr", "angstrom")))

    water_from_np = Molecule.from_data(npwater, name="water dimer", dtype="numpy", frags=[3])
    assert water_psi.compare(water_from_np)

    water_from_np = Molecule.from_data(npwater, name="water dimer", frags=[3])
    assert water_psi.compare(water_from_np)
    assert water_psi.get_molecular_formula() == "H4O2"
Beispiel #7
0
def test_molecule_data_constructor_dict():
    water_psi = water_dimer_minima.copy()

    # Check the JSON construct/deconstruct
    water_from_json = Molecule.from_data(water_psi.dict())
    assert water_psi.compare(water_psi, water_from_json)

    water_from_json = Molecule.from_data(water_psi.json(), "json")
    assert water_psi.compare(water_psi, water_from_json)
    assert water_psi.compare(
        Molecule.from_data(water_psi.to_string(), dtype="psi4"))
Beispiel #8
0
def test_molecule_data_constructor_dict():
    water_psi = water_dimer_minima.copy()

    # Check the JSON construct/deconstruct
    water_from_json = Molecule.from_data(water_psi.dict())
    assert water_psi == water_from_json

    water_from_json = Molecule.from_data(water_psi.json(), "json")
    assert water_psi == water_from_json
    assert water_psi == Molecule.from_data(water_psi.to_string("psi4"), dtype="psi4")

    assert water_psi.get_hash() == "3c4b98f515d64d1adc1648fe1fe1d6789e978d34"  # copied from schema_version=1
    assert water_psi.schema_version == 2
    assert water_psi.schema_name == "qcschema_molecule"
Beispiel #9
0
def test_pyscf_wrap_dft_co_h2o_sto3g():
    """Test embedded HF-in-HF case."""
    # Compared with QChem results
    co = Molecule.from_data(
        """C        -3.6180905689    1.3768035675   -0.0207958979
                               O        -4.7356838533    1.5255563000    0.1150239130"""
    )
    h2o = Molecule.from_data(
        """O  -7.9563726699    1.4854060709    0.1167920007
                                H  -6.9923165534    1.4211335985    0.1774706091
                                H  -8.1058463545    2.4422204631    0.1115993752"""
    )
    basis = 'sto-3g'
    xc_code = 'LDA,VWN'
    method = 'dft'
    args0 = {"mol": co, "basis": basis, "method": method, "xc_code": xc_code}
    args1 = {"mol": h2o, "basis": basis, "method": method, "xc_code": xc_code}
    embs = {
        "mol": co,
        "basis": basis,
        "method": 'dft',
        "xc_code": 'LDA,VWN',
        "t_code": 'XC_LDA_K_TF'
    }
    wrap = PyScfWrap(args0, args1, embs)
    wrap.run_embedding()
    embdic = wrap.energy_dict
    # Read reference
    qchem_rho_A_rho_B = 20.9016932248
    qchem_rho_A_Nuc_B = -21.0856319395
    qchem_rho_B_Nuc_A = -20.8950212739
    assert abs(qchem_rho_A_rho_B - embdic['rho0_rho1']) < 1e-5
    assert abs(qchem_rho_A_Nuc_B - embdic['nuc0_rho1']) < 1e-5
    assert abs(qchem_rho_B_Nuc_A - embdic['nuc1_rho0']) < 1e-5
    # DFT related terms
    qchem_int_ref_xc = -0.0011261095
    qchem_int_ref_t = 0.0022083882
    qchem_exc_nad = -0.0020907144
    qchem_et_nad = 0.0029633384
    qchem_int_emb_xc = -0.0011281762
    qchem_int_emb_t = 0.0022122190
    qchem_deltalin = 0.0000017641
    assert abs(qchem_et_nad - embdic['et_nad']) < 1e-6
    assert abs(qchem_exc_nad - embdic['exc_nad']) < 1e-6
    assert abs(qchem_int_ref_t - embdic['int_ref_t']) < 1e-6
    assert abs(qchem_int_ref_xc - embdic['int_ref_xc']) < 1e-6
    assert abs(qchem_int_emb_t - embdic['int_emb_t']) < 1e-6
    assert abs(qchem_int_emb_xc - embdic['int_emb_xc']) < 1e-6
    assert abs(qchem_deltalin - embdic['deltalin']) < 1e-7
Beispiel #10
0
    def match_geometry(
            self,
            mol: Molecule,
            tolerance: float = 1e-4) -> Tuple[AccuracyLevel, OxidationState]:
        """Match a geometry to one in this record

        Args:
            mol: Molecule structure in XYZ format
            tolerance: RMSD tolerance when matching by alignment
        Returns:
            - Accuracy level used to compute this structure
            - Oxidation state of this structure
        Raises:
            KeyError if structure not found
        """

        # Get the hash of my molecule
        mol_hash = get_hash(mol)

        # See if we can find a match based on hash
        for level, geoms in self.data.items():
            for state, geom in geoms.items():
                if geom.xyz_hash == mol_hash:
                    return level, state

        # If that fails, attempt to match based on alignment
        for level, geoms in self.data.items():
            for state, geom in geoms.items():
                target_mol = Molecule.from_data(geom.xyz, 'xyz')
                model, data = target_mol.align(mol, atoms_map=True)
                if data['rmsd'] < tolerance:
                    return level, state

        raise UnmatchedGeometry()
def submit_vertical_geometries(geom_dataset: GeometryDataset,
                               vert_datasets: List[SinglePointDataset]):
    all_geoms = geom_dataset.get_geometries()
    print(f'Found {len(all_geoms)} molecules in {geom_dataset.coll.name}')
    for inchi, geoms in all_geoms.items():
        # Get the neutral geometry
        if 'neutral' not in geoms:
            continue
        geom = geoms['neutral'].to_string('xyz')

        # Start the neutral geometry in all three charge states
        for postfix, charge in zip(['reduced', 'neutral', 'oxidized'],
                                   [-1, 0, 1]):
            # Make a name
            if charge != 0:
                identifier = f'{inchi}_xtb_neutral_{postfix}'
            else:
                identifier = f'{inchi}_xtb_neutral'
            new_geom = Molecule.from_data(geom,
                                          'xyz',
                                          molecular_charge=charge,
                                          name=identifier)
            # Loop over the different levels of accuracy
            for vert in vert_datasets:
                vert.add_molecule(new_geom, inchi, save=False)

    for vert in vert_datasets:  # Start the computations
        vert.coll.save()
        vert_started = vert.start_computation()
        print(f'Started {vert_started} computations for {vert.coll.name}')
def relax_structure(xyz: str,
                    qc_config: QCInputSpecification,
                    charge: int = 0,
                    compute_config: Optional[Union[TaskConfig, Dict]] = None,
                    code: str = _code) -> OptimizationResult:
    """Compute the atomization energy of a molecule given the SMILES string

    Args:
        xyz (str): Structure of a molecule in XYZ format
        qc_config (dict): Quantum Chemistry configuration used for evaluating the energy
        charge (int): Charge of the molecule
        compute_config (TaskConfig): Configuration for the quantum chemistry code, such as parallelization settings
        code (str): Which QC code to use for the evaluation
    Returns:
        (OptimizationResult): Full output from the calculation
    """

    # Parse the molecule
    mol = Molecule.from_data(xyz, dtype='xyz', molecular_charge=charge)

    # Run the relaxation
    if code == "nwchem":
        keywords = {"driver__maxiter": 100, "set__driver:linopt": 0}
        relax_code = "nwchemdriver"
    else:
        keywords = {"program": code}
        relax_code = "geometric"
    opt_input = OptimizationInput(input_specification=qc_config,
                                  initial_molecule=mol,
                                  keywords=keywords)
    return compute_procedure(opt_input,
                             relax_code,
                             local_options=compute_config,
                             raise_error=True)
def run_single_point(xyz: str,
                     driver: DriverEnum,
                     qc_config: QCInputSpecification,
                     charge: int = 0,
                     compute_config: Optional[Union[TaskConfig, Dict]] = None,
                     code: str = _code) -> AtomicResult:
    """Run a single point calculation

    Args:
        xyz: Structure in XYZ format
        driver: What type of property to compute: energy, gradient, hessian
        qc_config (dict): Quantum Chemistry configuration used for evaluating the energy
        charge (int): Charge of the molecule
        compute_config (TaskConfig): Configuration for the quantum chemistry code, such as parallelization settings
        code (str): Which QC code to use for the evaluation
    Returns:
        QCElemental-format result of the output
    """

    # Parse the molecule
    mol = Molecule.from_data(xyz, dtype="xyz", molecular_charge=charge)

    # Run the computation
    input_spec = AtomicInput(molecule=mol,
                             driver=driver,
                             **qc_config.dict(exclude={'driver'}))
    return compute(input_spec,
                   code,
                   local_options=compute_config,
                   raise_error=True)
def compute_reference_energy(element: str,
                             qc_config: QCInputSpecification,
                             n_open: int,
                             code: str = _code) -> float:
    """Compute the energy of an isolated atom in vacuum

    Args:
        element (str): Symbol of the element
        qc_config (QCInputSpecification): Quantum Chemistry configuration used for evaluating he energy
        n_open (int): Number of open atomic orbitals
        code (str): Which QC code to use for the evaluation
    Returns:
        (float): Energy of the isolated atom
    """

    # Make the molecule
    xyz = f'1\n{element}\n{element} 0 0 0'
    mol = Molecule.from_data(xyz,
                             dtype='xyz',
                             molecular_multiplicity=n_open,
                             molecular_charge=0)

    # Run the atomization energy calculation
    input_spec = AtomicInput(molecule=mol,
                             driver='energy',
                             **qc_config.dict(exclude={'driver'}))
    result = compute(input_spec, code, raise_error=True)

    return result.return_result
Beispiel #15
0
def test_hf_co_sto3g():
    """Test functions of ScfPyScf class."""
    mol = Molecule.from_data(
        """C        -3.6180905689    1.3768035675   -0.0207958979
                                O        -4.7356838533    1.5255563000    0.1150239130"""
    )
    basis = 'sto-3g'
    method = 'hf'
    hf = ScfPyScf(mol, basis, method)
    hf.solve_scf(conv_tol=1e-12)
    dm0 = hf.get_density()
    nao_co = len(dm0)
    ref_dm0 = np.loadtxt(cache.files["co_h2o_sto3g_dma"]).reshape(
        (nao_co, nao_co))
    np.testing.assert_allclose(ref_dm0 * 2, dm0, atol=1e-6)
    unperturbed_fock = hf.get_fock()
    assert 'scf' in hf.energy
    assert abs(hf.energy["scf"] - -111.22516947) < 1e-7
    vemb = np.zeros_like(dm0)
    hf.perturb_fock(vemb)
    hf.solve_scf()
    dm0_again = hf.get_density()
    np.testing.assert_allclose(ref_dm0 * 2, dm0_again, atol=1e-6)
    assert abs(hf.energy["scf"] - -111.22516947) < 1e-7
    perturbed_fock = hf.get_fock()
    np.testing.assert_allclose(unperturbed_fock, perturbed_fock, atol=1e-9)
Beispiel #16
0
def read_aggregate_molecules(input_json):
    molecules_list_dict = defaultdict(list)
    molecule_attributes = {}
    # open json file
    if input_json.endswith(".tar") or input_json.endswith(".tar.gz"):
        extract_file = input_json.replace(".gz", "").replace(".tar", ".json")
        with tarfile.open(input_json, 'r') as infile:
            molecule_data_list = json.load(infile.extractfile(extract_file))
    else:
        with open(input_json) as infile:
            molecule_data_list = json.load(infile)
    # put molecules and attributes into molecules_list_dict
    molecule_hash = defaultdict(set)  # use a dictionary to remove duplicates
    for mdata in molecule_data_list:
        initial_molecules = mdata['initial_molecules']
        cmiles_ids = mdata['cmiles_identifiers']
        index = cmiles_ids['canonical_isomeric_smiles']
        molecule_attributes[index] = cmiles_ids
        for m_json in initial_molecules:
            m_hash = Molecule.from_data(m_json).get_hash()
            # find duplicated molecules using their hash and skip them
            if m_hash not in molecule_hash[index]:
                molecule_hash[index].add(m_hash)
                molecules_list_dict[index].append(m_json)
    return molecules_list_dict, molecule_attributes
Beispiel #17
0
def test_hash_canary():
    water_dimer_minima = Molecule.from_data(
        """
    0 1
    O  -1.551007  -0.114520   0.000000
    H  -1.934259   0.762503   0.000000
    H  -0.599677   0.040712   0.000000
    --
    O   1.350625   0.111469   0.000000
    H   1.680398  -0.373741  -0.758561
    H   1.680398  -0.373741   0.758561
    """,
        dtype="psi4",
    )
    assert water_dimer_minima.get_hash(
    ) == "42f3ac52af52cf2105c252031334a2ad92aa911c"

    # Check orientation
    mol = water_dimer_minima.orient_molecule()
    assert mol.get_hash() == "632490a0601500bfc677e9277275f82fbc45affe"

    frag_0 = mol.get_fragment(0, orient=True)
    frag_1 = mol.get_fragment(1, orient=True)
    assert frag_0.get_hash() == "d0b499739f763e8d3a5556b4ddaeded6a148e4d5"
    assert frag_1.get_hash() == "bdc1f75bd1b7b999ff24783d7c1673452b91beb9"
Beispiel #18
0
def test_pyscf_wrap0():
    """Test basic functionality of PyScfWrap."""
    mol = Molecule.from_data("""He 0 0 0""")
    basis = 'sto-3g'
    dict0 = {'mol': 0}
    args0 = {"mol": mol, "basis": basis, "method": 'adc'}
    args1 = {"mol": mol, "basis": basis, "method": 'dft'}
    embs0 = {"mol": mol, "basis": basis, "method": 'hf'}
    embs1 = {
        "mol": mol,
        "basis": basis,
        "method": 'hf',
        "xc_code": 'LDA,VWN',
        "t_code": 'XC_LDA_K_TF'
    }
    with pytest.raises(KeyError):
        PyScfWrap(dict0, embs0, embs1)
    with pytest.raises(KeyError):
        PyScfWrap(embs0, dict0, embs1)
    with pytest.raises(ValueError):
        PyScfWrap(embs0, args1, embs1)
    with pytest.raises(KeyError):
        PyScfWrap(embs0, embs0, embs0)
    with pytest.raises(ValueError):
        PyScfWrap(args0, embs0, embs1)
Beispiel #19
0
def test_fragment_charge_configurations(f1c, f1m, f2c, f2m, tc, tm):

    mol = Molecule.from_data("""
    {f1c} {f1m}
    Li 0 0 0
    --
    {f2c} {f2m}
    Li 0 0 5
    """.format(f1c=f1c, f1m=f1m, f2c=f2c, f2m=f2m))

    assert pytest.approx(mol.molecular_charge) == tc
    assert mol.molecular_multiplicity == tm

    # Test fragment1
    assert pytest.approx(mol.get_fragment(0).molecular_charge) == f1c
    assert mol.get_fragment(0).molecular_multiplicity == f1m

    assert pytest.approx(mol.get_fragment(0, 1).molecular_charge) == f1c
    assert mol.get_fragment(0, 1).molecular_multiplicity == f1m

    # Test fragment2
    assert pytest.approx(mol.get_fragment(1).molecular_charge) == f2c
    assert mol.get_fragment(1).molecular_multiplicity == f2m

    assert pytest.approx(mol.get_fragment([1], 0).molecular_charge) == f2c
    assert mol.get_fragment(1, [0]).molecular_multiplicity == f2m
Beispiel #20
0
def test_openmm_gaff_keywords(gaff_settings):
    """
    Test the different running settings with gaff.
    """
    program = "openmm"
    water = qcng.get_molecule("water")

    water_dict = water.dict()
    # add water cmiles to the molecule
    water_dict["extras"] = {
        "cmiles": {
            "canonical_isomeric_explicit_hydrogen_mapped_smiles":
            "[H:2][O:1][H:3]"
        }
    }

    molecule = Molecule.from_data(water_dict)
    keywords, error, expected_result = gaff_settings
    model = {"method": "gaff-2.1", "basis": "antechamber"}
    inp = AtomicInput(molecule=molecule,
                      driver="energy",
                      model=model,
                      keywords=keywords)
    if error is not None:
        with pytest.raises(error):
            _ = qcng.compute(inp, program, raise_error=True)
    else:
        ret = qcng.compute(inp, program, raise_error=False)
        assert ret.success is True
        assert ret.return_result == pytest.approx(expected_result, rel=1e-6)
Beispiel #21
0
def test_openmm_cmiles_gradient_nomatch():
    program = "openmm"

    water = qcng.get_molecule("water")

    water_dict = water.dict()
    # add ethane cmiles to the molecule
    water_dict["extras"] = {
        "cmiles": {
            "canonical_isomeric_explicit_hydrogen_mapped_smiles":
            "[H:3][C:1]([H:4])([H:5])[C:2]([H:6])([H:7])[H:8]"
        }
    }

    molecule = Molecule.from_data(water_dict)

    model = {"method": "openff-1.0.0", "basis": "smirnoff"}

    inp = AtomicInput(molecule=molecule, driver="gradient", model=model)
    ret = qcng.compute(inp, program, raise_error=False)

    # if we correctly find the cmiles this should fail as the molecule and cmiles are different
    assert ret.success is False
    assert (
        "molecule.add_conformer given input of the wrong shape: Given (3, 3), expected (8, 3)"
        in ret.error.error_message)
Beispiel #22
0
    def _spawn_optimization(
            grid_point: str, job: List[float],
            input_model: "TorsionDriveInput", config: "TaskConfig"
    ) -> Union[FailedOperation, OptimizationResult]:
        """Spawns an optimization at a particular grid point and returns the result.

        Parameters
        ----------
        grid_point
            A string of the form 'dihedral_1_angle ... dihedral_n_angle' that encodes
            the current dihedrals angles to optimize at.
        job
            The flattened conformer of the molecule to start the optimization at with
            length=(n_atoms * 3)
        input_model
            The input model containing the relevant settings for how to optimize the
            structure.
        config
            The configuration to launch the task using.

        Returns
        -------
            The result of the optimization if successful, otherwise an error containing
            object.
        """

        from qcengine import compute_procedure

        input_molecule = input_model.initial_molecule[0].copy(deep=True).dict()
        input_molecule["geometry"] = np.array(job).reshape(
            len(input_molecule["symbols"]), 3)
        input_molecule = Molecule.from_data(input_molecule)

        dihedrals = input_model.keywords.dihedrals
        angles = grid_point.split()

        keywords = {
            **input_model.optimization_spec.keywords,
            "constraints": {
                "set": [{
                    "type": "dihedral",
                    "indices": dihedral,
                    "value": int(angle),
                } for dihedral, angle in zip(dihedrals, angles)]
            },
        }

        input_data = OptimizationInput(
            keywords=keywords,
            extras={},
            protocols=input_model.optimization_spec.protocols,
            input_specification=input_model.input_specification,
            initial_molecule=input_molecule,
        )

        return compute_procedure(
            input_data,
            procedure=input_model.optimization_spec.procedure,
            local_options=config.dict())
Beispiel #23
0
def test_molecule_json_serialization():
    assert isinstance(water_dimer_minima.json(), str)

    assert isinstance(
        water_dimer_minima.dict(encoding="json")["geometry"], list)

    assert water_dimer_minima == Molecule.from_data(water_dimer_minima.json(),
                                                    dtype="json")
def read_molecules(input_json):
    """ Extract the molecules and the index of them from the input json file

    Parameters
    ----------
    input_json: str,
        JSON file name to the output json of generate.py
        The data in the json file should be a list of {'initial_molecules': [..], 'cmiles_identifiers':{}}.

    Returns
    -------
    molecules_dict: dict
        The dictionary maps the index of a molecule to a Molecule object. e.g.
        {
            index1: Molecule1,
            index2: Molecule2,
        }

    molecule_attributes: dict
        The dicitonary maps the index of a molecule to the attributes of the molecule, e.g.
        {
            index1: {'canonical_explicit_hydrogen_smiles': .., 'canonical_isomeric_smiles': .., ..}
        }

    Note
    ----
    1. The mdata['cmiles_identifiers']['canonical_isomeric_smiles'] is selected as the index.
    2. For molecules have the same "canonical_isomeric_smiles", we use index-1, index-2 to distinguish them.
    """
    molecules_dict = {}
    molecule_attributes = {}

    if input_json.endswith(".tar") or input_json.endswith(".tar.gz"):

        extract_file = input_json.replace(".gz", "").replace(".tar", ".json")
        with tarfile.open(input_json, 'r') as infile:

            molecule_data_list = json.load(infile.extractfile(extract_file))

    else:
        with open(input_json) as infile:
            molecule_data_list = json.load(infile)

    index_counter = Counter()
    for mdata in molecule_data_list:
        initial_molecules = mdata['initial_molecules']
        cmiles_ids = mdata['cmiles_identifiers']
        index = cmiles_ids['canonical_isomeric_smiles']
        for i_conformer, initial_molecule in enumerate(initial_molecules):
            qcel_molecule = Molecule.from_data(initial_molecule)
            # use count to generate unique index
            index_count = index_counter[index]
            this_index = f'{index}-{index_count}'
            index_counter[index] += 1
            assert this_index not in molecules_dict, f"Multiple molecules have the same index, please check {mdata}"
            molecules_dict[this_index] = qcel_molecule
            molecule_attributes[this_index] = cmiles_ids
    return molecules_dict, molecule_attributes
Beispiel #25
0
def test_from_data_kwargs():
    mol = Molecule.from_data(
        """
        O 0 0 0
        H 0 1.5 0
        H 0 0 1.5
        """,
        molecular_charge=1,
        molecular_multiplicity=2,
        fragment_charges=[1],
        fragment_multiplicities=[2],
    )
    assert mol.molecular_charge == 1
    assert mol.molecular_multiplicity == 2
    assert mol.fragment_charges[0] == 1
    assert mol.fragment_multiplicities[0] == 2

    mol = Molecule.from_data(
        """
            O 0 0 0
            H 0 1.5 0
            H 0 0 1.5
            """,
        molecular_charge=1,
        molecular_multiplicity=2,
    )
    assert mol.molecular_charge == 1
    assert mol.molecular_multiplicity == 2
    assert mol.fragment_charges[0] == 1
    assert mol.fragment_multiplicities[0] == 2

    with pytest.raises(qcel.ValidationError) as e:
        mol = Molecule.from_data(
            """
            O 0 0 0
            H 0 1.5 0
            H 0 0 1.5
            """,
            molecular_charge=1,
            molecular_multiplicity=2,
            fragment_charges=[2],
        )
    assert "Inconsistent or unspecified chg/mult" in str(e.value)
Beispiel #26
0
def test_nuclearrepulsionenergy_nelectrons():

    mol = Molecule.from_data("""
    0 1
    --
    O          0.75119       -0.61395        0.00271
    H          1.70471       -0.34686        0.00009
    --
    1 1
    N         -2.77793        0.00179       -0.00054
    H         -2.10136        0.51768        0.60424
    H         -3.45559       -0.51904        0.60067
    H         -2.26004       -0.67356       -0.60592
    H         -3.29652        0.68076       -0.60124
    units ang
    """)

    assert compare_values(34.60370459,
                          mol.nuclear_repulsion_energy(),
                          'D',
                          atol=1.e-5)
    assert compare_values(4.275210518,
                          mol.nuclear_repulsion_energy(ifr=0),
                          'M1',
                          atol=1.e-5)
    assert compare_values(16.04859029,
                          mol.nuclear_repulsion_energy(ifr=1),
                          'M2',
                          atol=1.e-5)

    assert compare(20, mol.nelectrons(), 'D')
    assert compare(10, mol.nelectrons(ifr=0), 'M1')
    assert compare(10, mol.nelectrons(ifr=1), 'M2')

    mol = mol.get_fragment([1], 0)
    # Notice the 0th/1st fragments change. Got to stop get_fragment from reordering
    ifr0 = 1
    ifr1 = 0
    assert compare_values(16.04859029,
                          mol.nuclear_repulsion_energy(),
                          'D',
                          atol=1.e-5)
    assert compare_values(0.0,
                          mol.nuclear_repulsion_energy(ifr=ifr0),
                          'M1',
                          atol=1.e-5)
    assert compare_values(16.04859029,
                          mol.nuclear_repulsion_energy(ifr=ifr1),
                          'M2',
                          atol=1.e-5)

    assert compare(10, mol.nelectrons(), 'D')
    assert compare(0, mol.nelectrons(ifr=ifr0), 'M1')
    assert compare(10, mol.nelectrons(ifr=ifr1), 'M2')
def read_aggregate_molecules(input_json):
    """ Extract the molecules and the index of them from the input json file
    aggregate molecules with the same index into a list

    Parameters
    ----------
    input_json: str,
        JSON file name to the output json of generate.py
        The data in the json file should be a list of {'initial_molecules': [..], 'cmiles_identifiers':{}}.

    Returns
    -------
    molecules_list_dict: dict
        The dictionary maps the index of a molecule to a Molecule object. e.g.
        {
            index1: [Molecule_json1a, Molecule_json1b, ..],
            index2: [Molecule_json2a, Molecule_json2b, ..],
        }

    molecule_attributes: dict
        The dicitonary maps the index of a molecule to the attributes of the molecule, e.g.
        {
            index1: {'canonical_explicit_hydrogen_smiles': .., 'canonical_isomeric_smiles': .., ..}
        }

    Note
    ----
    1. The mdata['cmiles_identifiers']['canonical_isomeric_smiles'] is selected as the index.
    2. For molecules have the same "canonical_isomeric_smiles", we use index-1, index-2 to distinguish them.
    """
    molecules_list_dict = defaultdict(list)
    molecule_attributes = {}
    # open json file
    if input_json.endswith(".tar") or input_json.endswith(".tar.gz"):
        extract_file = input_json.replace(".gz", "").replace(".tar", ".json")
        with tarfile.open(input_json, 'r') as infile:
            molecule_data_list = json.load(infile.extractfile(extract_file))
    else:
        with open(input_json) as infile:
            molecule_data_list = json.load(infile)
    # put molecules and attributes into molecules_list_dict
    molecule_hash = defaultdict(set)  # use a dictionary to remove duplicates
    for mdata in molecule_data_list:
        initial_molecules = mdata['initial_molecules']
        cmiles_ids = mdata['cmiles_identifiers']
        index = cmiles_ids['canonical_isomeric_smiles']
        molecule_attributes[index] = cmiles_ids
        for m_json in initial_molecules:
            m_hash = Molecule.from_data(m_json).get_hash()
            # find duplicated molecules using their hash and skip them
            if m_hash not in molecule_hash[index]:
                molecule_hash[index].add(m_hash)
                molecules_list_dict[index].append(m_json)
    return molecules_list_dict, molecule_attributes
Beispiel #28
0
    def update_derived_properties(self, verbose: bool = True):
        """Update all derived properties for a molecule

        Includes thermochemistry and lookup hashes

        Args:
            verbose: Whether to print out log messages
        """

        self.xyz_hash = get_hash(Molecule.from_data(self.xyz, 'xyz'))
        self.update_thermochem(verbose=verbose)
Beispiel #29
0
def test_nuclearrepulsionenergy_nelectrons():

    mol = Molecule.from_data("""
    0 1
    --
    O          0.75119       -0.61395        0.00271
    H          1.70471       -0.34686        0.00009
    --
    1 1
    N         -2.77793        0.00179       -0.00054
    H         -2.10136        0.51768        0.60424
    H         -3.45559       -0.51904        0.60067
    H         -2.26004       -0.67356       -0.60592
    H         -3.29652        0.68076       -0.60124
    units ang
    """)

    assert compare_values(34.60370459,
                          mol.nuclear_repulsion_energy(),
                          "D",
                          atol=1.0e-5)
    assert compare_values(4.275210518,
                          mol.nuclear_repulsion_energy(ifr=0),
                          "M1",
                          atol=1.0e-5)
    assert compare_values(16.04859029,
                          mol.nuclear_repulsion_energy(ifr=1),
                          "M2",
                          atol=1.0e-5)

    assert compare(20, mol.nelectrons(), "D")
    assert compare(10, mol.nelectrons(ifr=0), "M1")
    assert compare(10, mol.nelectrons(ifr=1), "M2")

    mol = mol.get_fragment([1], 0, group_fragments=False)
    # Notice the 0th/1st fragments change if default group_fragments=True.
    ifr0 = 0
    ifr1 = 1
    assert compare_values(16.04859029,
                          mol.nuclear_repulsion_energy(),
                          "D",
                          atol=1.0e-5)
    assert compare_values(0.0,
                          mol.nuclear_repulsion_energy(ifr=ifr0),
                          "M1",
                          atol=1.0e-5)
    assert compare_values(16.04859029,
                          mol.nuclear_repulsion_energy(ifr=ifr1),
                          "M2",
                          atol=1.0e-5)

    assert compare(10, mol.nelectrons(), "D")
    assert compare(0, mol.nelectrons(ifr=ifr0), "M1")
    assert compare(10, mol.nelectrons(ifr=ifr1), "M2")
Beispiel #30
0
def test_pyscf_base():
    """Test ScfPySCF class."""
    mol = Molecule.from_data("""Li 0 0 0""")
    mol2 = Molecule.from_data("""He 0 0 0""")
    basis = 0
    basis2 = 'sto-3g'
    method0 = 'adc'
    method2 = 'hf'
    method3 = 'dft'
    with pytest.raises(TypeError):
        hf = ScfPyScf(mol, basis, method2)
    with pytest.raises(ValueError):
        hf = ScfPyScf(mol2, basis2, method0)
        hf.perturb_fock(basis)
    with pytest.raises(TypeError):
        hf = ScfPyScf(mol2, basis2, method2)
        hf.perturb_fock(basis)
    with pytest.raises(ValueError):
        ScfPyScf(mol2, basis2, method3)
    with pytest.raises(NotImplementedError):
        ScfPyScf(mol, basis2, method2)