Ejemplo n.º 1
0
def test_to_from_file_complex(tmp_path, dtype):

    p = tmp_path / ("water." + dtype)
    water_dimer_minima.to_file(p)

    mol = Molecule.from_file(p)
    assert mol == water_dimer_minima
Ejemplo n.º 2
0
def jajo2mol(jajodic):
    """Returns a Molecule from entries in dictionary *jajodic* extracted
    from JAINDX and JOBARC.

    """
    zmap = jajodic[b'MAP2ZMAT']
    elem = jajodic[b'ATOMCHRG']
    coord = jajodic[b'COORD   ']
    Nat = len(elem)

    molxyz = '%d bohr\n\n' % (Nat)
    # TODO chgmult, though not really necessary for reorientation
    for at in range(Nat):
        posn = zmap[at] - 1
        el = 'GH' if elem[posn] == 0 else qcel.periodictable.to_E(elem[posn])
        posn *= 3
        molxyz += '%s %21.15f %21.15f %21.15f\n' % (
            el, coord[posn], coord[posn + 1], coord[posn + 2])
    mol = Molecule(validate=False,
                   **qcel.molparse.to_schema(qcel.molparse.from_string(
                       molxyz,
                       dtype='xyz+',
                       fix_com=True,
                       fix_orientation=True)["qm"],
                                             dtype=2))

    return mol
def read_aggregate_molecules(input_json):
    molecules_list_dict = defaultdict(list)
    molecule_attributes = {}
    # open json file
    if input_json.endswith(".tar") or input_json.endswith(".tar.gz"):
        extract_file = input_json.replace(".gz", "").replace(".tar", ".json")
        with tarfile.open(input_json, 'r') as infile:
            molecule_data_list = json.load(infile.extractfile(extract_file))
    else:
        with open(input_json) as infile:
            molecule_data_list = json.load(infile)
    # put molecules and attributes into molecules_list_dict
    molecule_hash = defaultdict(set)  # use a dictionary to remove duplicates
    for mdata in molecule_data_list:
        initial_molecules = mdata['initial_molecules']
        cmiles_ids = mdata['cmiles_identifiers']
        index = cmiles_ids['canonical_isomeric_smiles']
        molecule_attributes[index] = cmiles_ids
        for m_json in initial_molecules:
            m_hash = Molecule.from_data(m_json).get_hash()
            # find duplicated molecules using their hash and skip them
            if m_hash not in molecule_hash[index]:
                molecule_hash[index].add(m_hash)
                molecules_list_dict[index].append(m_json)
    return molecules_list_dict, molecule_attributes
def submit_vertical_geometries(geom_dataset: GeometryDataset,
                               vert_datasets: List[SinglePointDataset]):
    all_geoms = geom_dataset.get_geometries()
    print(f'Found {len(all_geoms)} molecules in {geom_dataset.coll.name}')
    for inchi, geoms in all_geoms.items():
        # Get the neutral geometry
        if 'neutral' not in geoms:
            continue
        geom = geoms['neutral'].to_string('xyz')

        # Start the neutral geometry in all three charge states
        for postfix, charge in zip(['reduced', 'neutral', 'oxidized'],
                                   [-1, 0, 1]):
            # Make a name
            if charge != 0:
                identifier = f'{inchi}_xtb_neutral_{postfix}'
            else:
                identifier = f'{inchi}_xtb_neutral'
            new_geom = Molecule.from_data(geom,
                                          'xyz',
                                          molecular_charge=charge,
                                          name=identifier)
            # Loop over the different levels of accuracy
            for vert in vert_datasets:
                vert.add_molecule(new_geom, inchi, save=False)

    for vert in vert_datasets:  # Start the computations
        vert.coll.save()
        vert_started = vert.start_computation()
        print(f'Started {vert_started} computations for {vert.coll.name}')
Ejemplo n.º 5
0
def harvest_GRD(grd):
    """Parses the contents *grd* of the Cfour GRD file into the gradient
    array and coordinate information. The coordinate info is converted
    into a rather dinky Molecule (no charge, multiplicity, or fragment),
    but this is these coordinates that govern the reading of molecule
    orientation by Cfour. Return qcel.models.Molecule and gradient array.

    """
    grd = grd.splitlines()
    Nat = int(grd[0].split()[0])
    molxyz = f"{Nat} bohr\n\n"

    grad = []
    for at in range(Nat):
        mline = grd[at + 1].split()
        el = "GH" if int(float(mline[0])) == 0 else qcel.periodictable.to_E(
            int(float(mline[0])))
        molxyz += "%s %16s %16s %16s\n" % (el, mline[-3], mline[-2], mline[-1])
        lline = grd[at + 1 + Nat].split()
        grad.append([float(lline[-3]), float(lline[-2]), float(lline[-1])])
    mol = Molecule(
        validate=False,
        **qcel.molparse.to_schema(qcel.molparse.from_string(
            molxyz, dtype="xyz+", fix_com=True, fix_orientation=True)["qm"],
                                  dtype=2),
    )

    return mol, grad
Ejemplo n.º 6
0
def run_single_point(xyz: str,
                     driver: DriverEnum,
                     qc_config: QCInputSpecification,
                     charge: int = 0,
                     compute_config: Optional[Union[TaskConfig, Dict]] = None,
                     code: str = _code) -> AtomicResult:
    """Run a single point calculation

    Args:
        xyz: Structure in XYZ format
        driver: What type of property to compute: energy, gradient, hessian
        qc_config (dict): Quantum Chemistry configuration used for evaluating the energy
        charge (int): Charge of the molecule
        compute_config (TaskConfig): Configuration for the quantum chemistry code, such as parallelization settings
        code (str): Which QC code to use for the evaluation
    Returns:
        QCElemental-format result of the output
    """

    # Parse the molecule
    mol = Molecule.from_data(xyz, dtype="xyz", molecular_charge=charge)

    # Run the computation
    input_spec = AtomicInput(molecule=mol,
                             driver=driver,
                             **qc_config.dict(exclude={'driver'}))
    return compute(input_spec,
                   code,
                   local_options=compute_config,
                   raise_error=True)
Ejemplo n.º 7
0
def relax_structure(xyz: str,
                    qc_config: QCInputSpecification,
                    charge: int = 0,
                    compute_config: Optional[Union[TaskConfig, Dict]] = None,
                    code: str = _code) -> OptimizationResult:
    """Compute the atomization energy of a molecule given the SMILES string

    Args:
        xyz (str): Structure of a molecule in XYZ format
        qc_config (dict): Quantum Chemistry configuration used for evaluating the energy
        charge (int): Charge of the molecule
        compute_config (TaskConfig): Configuration for the quantum chemistry code, such as parallelization settings
        code (str): Which QC code to use for the evaluation
    Returns:
        (OptimizationResult): Full output from the calculation
    """

    # Parse the molecule
    mol = Molecule.from_data(xyz, dtype='xyz', molecular_charge=charge)

    # Run the relaxation
    if code == "nwchem":
        keywords = {"driver__maxiter": 100, "set__driver:linopt": 0}
        relax_code = "nwchemdriver"
    else:
        keywords = {"program": code}
        relax_code = "geometric"
    opt_input = OptimizationInput(input_specification=qc_config,
                                  initial_molecule=mol,
                                  keywords=keywords)
    return compute_procedure(opt_input,
                             relax_code,
                             local_options=compute_config,
                             raise_error=True)
Ejemplo n.º 8
0
def test_fragment_charge_configurations(f1c, f1m, f2c, f2m, tc, tm):

    mol = Molecule.from_data(
        """
    {f1c} {f1m}
    Li 0 0 0
    --
    {f2c} {f2m}
    Li 0 0 5
    """.format(
            f1c=f1c, f1m=f1m, f2c=f2c, f2m=f2m
        )
    )

    assert pytest.approx(mol.molecular_charge) == tc
    assert mol.molecular_multiplicity == tm

    # Test fragment1
    assert pytest.approx(mol.get_fragment(0).molecular_charge) == f1c
    assert mol.get_fragment(0).molecular_multiplicity == f1m

    assert pytest.approx(mol.get_fragment(0, 1).molecular_charge) == f1c
    assert mol.get_fragment(0, 1).molecular_multiplicity == f1m

    # Test fragment2
    assert pytest.approx(mol.get_fragment(1).molecular_charge) == f2c
    assert mol.get_fragment(1).molecular_multiplicity == f2m

    assert pytest.approx(mol.get_fragment([1], 0).molecular_charge) == f2c
    assert mol.get_fragment(1, [0]).molecular_multiplicity == f2m
Ejemplo n.º 9
0
def compute_reference_energy(element: str,
                             qc_config: QCInputSpecification,
                             n_open: int,
                             code: str = _code) -> float:
    """Compute the energy of an isolated atom in vacuum

    Args:
        element (str): Symbol of the element
        qc_config (QCInputSpecification): Quantum Chemistry configuration used for evaluating he energy
        n_open (int): Number of open atomic orbitals
        code (str): Which QC code to use for the evaluation
    Returns:
        (float): Energy of the isolated atom
    """

    # Make the molecule
    xyz = f'1\n{element}\n{element} 0 0 0'
    mol = Molecule.from_data(xyz,
                             dtype='xyz',
                             molecular_multiplicity=n_open,
                             molecular_charge=0)

    # Run the atomization energy calculation
    input_spec = AtomicInput(molecule=mol,
                             driver='energy',
                             **qc_config.dict(exclude={'driver'}))
    result = compute(input_spec, code, raise_error=True)

    return result.return_result
Ejemplo n.º 10
0
def test_hf_co_sto3g():
    """Test functions of ScfPyScf class."""
    mol = Molecule.from_data(
        """C        -3.6180905689    1.3768035675   -0.0207958979
                                O        -4.7356838533    1.5255563000    0.1150239130"""
    )
    basis = 'sto-3g'
    method = 'hf'
    hf = ScfPyScf(mol, basis, method)
    hf.solve_scf(conv_tol=1e-12)
    dm0 = hf.get_density()
    nao_co = len(dm0)
    ref_dm0 = np.loadtxt(cache.files["co_h2o_sto3g_dma"]).reshape(
        (nao_co, nao_co))
    np.testing.assert_allclose(ref_dm0 * 2, dm0, atol=1e-6)
    unperturbed_fock = hf.get_fock()
    assert 'scf' in hf.energy
    assert abs(hf.energy["scf"] - -111.22516947) < 1e-7
    vemb = np.zeros_like(dm0)
    hf.perturb_fock(vemb)
    hf.solve_scf()
    dm0_again = hf.get_density()
    np.testing.assert_allclose(ref_dm0 * 2, dm0_again, atol=1e-6)
    assert abs(hf.energy["scf"] - -111.22516947) < 1e-7
    perturbed_fock = hf.get_fock()
    np.testing.assert_allclose(unperturbed_fock, perturbed_fock, atol=1e-9)
Ejemplo n.º 11
0
def test_molecule_data_constructor_dict():
    water_psi = water_dimer_minima.copy()

    # Check the JSON construct/deconstruct
    water_from_json = Molecule.from_data(water_psi.dict())
    assert water_psi == water_from_json

    water_from_json = Molecule.from_data(water_psi.json(), "json")
    assert water_psi == water_from_json
    assert water_psi == Molecule.from_data(water_psi.to_string("psi4"),
                                           dtype="psi4")

    assert water_psi.get_hash(
    ) == "3c4b98f515d64d1adc1648fe1fe1d6789e978d34"  # copied from schema_version=1
    assert water_psi.schema_version == 2
    assert water_psi.schema_name == "qcschema_molecule"
Ejemplo n.º 12
0
def test_to_from_file_simple(tmp_path, dtype, filext):

    benchmol = Molecule.from_data(
        """
    O 0 0 0
    H 0 1.5 0
    H 0 0 1.5
    """
    )

    p = tmp_path / ("water." + filext)
    benchmol.to_file(p)

    mol = Molecule.from_file(p)

    assert mol == benchmol
Ejemplo n.º 13
0
    def _spawn_optimization(
            grid_point: str, job: List[float],
            input_model: "TorsionDriveInput", config: "TaskConfig"
    ) -> Union[FailedOperation, OptimizationResult]:
        """Spawns an optimization at a particular grid point and returns the result.

        Parameters
        ----------
        grid_point
            A string of the form 'dihedral_1_angle ... dihedral_n_angle' that encodes
            the current dihedrals angles to optimize at.
        job
            The flattened conformer of the molecule to start the optimization at with
            length=(n_atoms * 3)
        input_model
            The input model containing the relevant settings for how to optimize the
            structure.
        config
            The configuration to launch the task using.

        Returns
        -------
            The result of the optimization if successful, otherwise an error containing
            object.
        """

        from qcengine import compute_procedure

        input_molecule = input_model.initial_molecule[0].copy(deep=True).dict()
        input_molecule["geometry"] = np.array(job).reshape(
            len(input_molecule["symbols"]), 3)
        input_molecule = Molecule.from_data(input_molecule)

        dihedrals = input_model.keywords.dihedrals
        angles = grid_point.split()

        keywords = {
            **input_model.optimization_spec.keywords,
            "constraints": {
                "set": [{
                    "type": "dihedral",
                    "indices": dihedral,
                    "value": int(angle),
                } for dihedral, angle in zip(dihedrals, angles)]
            },
        }

        input_data = OptimizationInput(
            keywords=keywords,
            extras={},
            protocols=input_model.optimization_spec.protocols,
            input_specification=input_model.input_specification,
            initial_molecule=input_molecule,
        )

        return compute_procedure(
            input_data,
            procedure=input_model.optimization_spec.procedure,
            local_options=config.dict())
Ejemplo n.º 14
0
    def _compute(self, driver):
        logger = logging.getLogger(__name__)
        logger.info("UserComputer only returning provided values")
        E = self.external_energy
        gX = self.external_gradient
        HX = self.external_hessian

        if driver == "hessian":
            if HX is None or gX is None or E is None:
                raise OptError("Must provide hessian, gradient, and energy.")
        elif driver == "gradient":
            if gX is None or E is None:
                raise OptError("Must provide gradient and energy.")
        elif driver == "energy":
            if E is None:
                raise OptError("Must provide energy.")

        result = deepcopy(UserComputer.output_skeleton)
        result["driver"] = driver
        mol = Molecule(**self.molecule)
        result["molecule"] = mol
        NRE = mol.nuclear_repulsion_energy()
        result["properties"]["nuclear_repulsion_energy"] = NRE
        result["extras"]["qcvars"]["NUCLEAR REPULSION ENERGY"] = NRE

        result["properties"]["return_energy"] = E
        result["extras"]["qcvars"]["CURRENT ENERGY"] = E

        if driver in ["gradient", "hessian"]:
            result["extras"]["qcvars"]["CURRENT GRADIENT"] = gX

        if driver == "hessian":
            result["extras"]["qcvars"]["CURRENT HESSIAN"] = HX

        if driver == "energy":
            result["return_result"] = E
        elif driver == "gradient":
            result["return_result"] = gX
        elif driver == "hessian":
            result["return_result"] = HX

        # maybe do this to protect against repeatedly going back for same?
        self.external_energy = None
        self.external_gradient = None
        self.external_hessian = None
        return AtomicResult(**result)
Ejemplo n.º 15
0
def get_molecule(name):
    """
    Returns a QC JSON representation of a test molecule.
    """
    if name not in _test_mols:
        raise KeyError("Molecule name '{}' not found".format(name))

    return Molecule(**copy.deepcopy(_test_mols[name]))
Ejemplo n.º 16
0
def test_molecule_data_constructor_numpy():
    water_psi = water_dimer_minima.copy()
    ele = np.array(water_psi.atomic_numbers).reshape(-1, 1)
    npwater = np.hstack((ele, water_psi.geometry *
                         qcel.constants.conversion_factor("Bohr", "angstrom")))

    water_from_np = Molecule.from_data(npwater,
                                       name="water dimer",
                                       dtype="numpy",
                                       frags=[3])
    assert water_psi == water_from_np

    water_from_np = Molecule.from_data(npwater, name="water dimer", frags=[3])
    assert water_psi == water_from_np
    assert water_psi.get_molecular_formula() == "H4O2"
    assert water_psi.get_molecular_formula(order="alphabetical") == "H4O2"
    assert water_psi.get_molecular_formula(order="hill") == "H4O2"
Ejemplo n.º 17
0
def test_molecule_json_serialization():
    assert isinstance(water_dimer_minima.json(), str)

    assert isinstance(
        water_dimer_minima.dict(encoding="json")["geometry"], list)

    assert water_dimer_minima == Molecule.from_data(water_dimer_minima.json(),
                                                    dtype="json")
def read_molecules(input_json):
    """ Extract the molecules and the index of them from the input json file

    Parameters
    ----------
    input_json: str,
        JSON file name to the output json of generate.py
        The data in the json file should be a list of {'initial_molecules': [..], 'cmiles_identifiers':{}}.

    Returns
    -------
    molecules_dict: dict
        The dictionary maps the index of a molecule to a Molecule object. e.g.
        {
            index1: Molecule1,
            index2: Molecule2,
        }

    molecule_attributes: dict
        The dicitonary maps the index of a molecule to the attributes of the molecule, e.g.
        {
            index1: {'canonical_explicit_hydrogen_smiles': .., 'canonical_isomeric_smiles': .., ..}
        }

    Note
    ----
    1. The mdata['cmiles_identifiers']['canonical_isomeric_smiles'] is selected as the index.
    2. For molecules have the same "canonical_isomeric_smiles", we use index-1, index-2 to distinguish them.
    """
    molecules_dict = {}
    molecule_attributes = {}

    if input_json.endswith(".tar") or input_json.endswith(".tar.gz"):

        extract_file = input_json.replace(".gz", "").replace(".tar", ".json")
        with tarfile.open(input_json, 'r') as infile:

            molecule_data_list = json.load(infile.extractfile(extract_file))

    else:
        with open(input_json) as infile:
            molecule_data_list = json.load(infile)

    index_counter = Counter()
    for mdata in molecule_data_list:
        initial_molecules = mdata['initial_molecules']
        cmiles_ids = mdata['cmiles_identifiers']
        index = cmiles_ids['canonical_isomeric_smiles']
        for i_conformer, initial_molecule in enumerate(initial_molecules):
            qcel_molecule = Molecule.from_data(initial_molecule)
            # use count to generate unique index
            index_count = index_counter[index]
            this_index = f'{index}-{index_count}'
            index_counter[index] += 1
            assert this_index not in molecules_dict, f"Multiple molecules have the same index, please check {mdata}"
            molecules_dict[this_index] = qcel_molecule
            molecule_attributes[this_index] = cmiles_ids
    return molecules_dict, molecule_attributes
Ejemplo n.º 19
0
    def generate_schema_input(self, driver):

        molecule = Molecule(**self.molecule)
        inp = AtomicInput(molecule=molecule,
                          model=self.model,
                          keywords=self.keywords,
                          driver=driver)

        return inp
Ejemplo n.º 20
0
def test_orient_nomasses():
    """
    Masses must be auto generated on the fly
    """

    mol = Molecule(symbols=["He", "He"], geometry=[0, 0, -2, 0, 0, 2], orient=True, validated=True)

    assert mol.__dict__["masses_"] is None
    assert compare_values([[2, 0, 0], [-2, 0, 0]], mol.geometry)
Ejemplo n.º 21
0
def test_from_file_string(tmp_path):

    p = tmp_path / "water.psimol"
    p.write_text(water_dimer_minima.to_string("psi4"))

    mol = Molecule.from_file(p)

    assert mol.compare(water_dimer_minima)
    assert mol.compare(water_dimer_minima.dict())
Ejemplo n.º 22
0
def test_from_data_kwargs():
    mol = Molecule.from_data(
        """
        O 0 0 0
        H 0 1.5 0
        H 0 0 1.5
        """,
        molecular_charge=1,
        molecular_multiplicity=2,
        fragment_charges=[1],
        fragment_multiplicities=[2],
    )
    assert mol.molecular_charge == 1
    assert mol.molecular_multiplicity == 2
    assert mol.fragment_charges[0] == 1
    assert mol.fragment_multiplicities[0] == 2

    mol = Molecule.from_data(
        """
            O 0 0 0
            H 0 1.5 0
            H 0 0 1.5
            """,
        molecular_charge=1,
        molecular_multiplicity=2,
    )
    assert mol.molecular_charge == 1
    assert mol.molecular_multiplicity == 2
    assert mol.fragment_charges[0] == 1
    assert mol.fragment_multiplicities[0] == 2

    with pytest.raises(qcel.ValidationError) as e:
        mol = Molecule.from_data(
            """
            O 0 0 0
            H 0 1.5 0
            H 0 0 1.5
            """,
            molecular_charge=1,
            molecular_multiplicity=2,
            fragment_charges=[2],
        )
    assert "Inconsistent or unspecified chg/mult" in str(e.value)
Ejemplo n.º 23
0
def test_to_from_file_charge_spin(tmp_path, dtype, filext):

    benchmol = Molecule.from_data("""
    1 2
    O 0 0 0
    H 0 1.5 0
    H 0 0 1.5
    """)

    p = tmp_path / ("water." + filext)
    benchmol.to_file(p, dtype=dtype)

    mol = Molecule.from_file(p, dtype=dtype)

    assert mol.molecular_charge == 1
    assert mol.molecular_multiplicity == 2
    assert mol.fragment_charges[0] == 1
    assert mol.fragment_multiplicities[0] == 2
    assert mol == benchmol
Ejemplo n.º 24
0
def test_nuclearrepulsionenergy_nelectrons():

    mol = Molecule.from_data("""
    0 1
    --
    O          0.75119       -0.61395        0.00271
    H          1.70471       -0.34686        0.00009
    --
    1 1
    N         -2.77793        0.00179       -0.00054
    H         -2.10136        0.51768        0.60424
    H         -3.45559       -0.51904        0.60067
    H         -2.26004       -0.67356       -0.60592
    H         -3.29652        0.68076       -0.60124
    units ang
    """)

    assert compare_values(34.60370459,
                          mol.nuclear_repulsion_energy(),
                          "D",
                          atol=1.0e-5)
    assert compare_values(4.275210518,
                          mol.nuclear_repulsion_energy(ifr=0),
                          "M1",
                          atol=1.0e-5)
    assert compare_values(16.04859029,
                          mol.nuclear_repulsion_energy(ifr=1),
                          "M2",
                          atol=1.0e-5)

    assert compare(20, mol.nelectrons(), "D")
    assert compare(10, mol.nelectrons(ifr=0), "M1")
    assert compare(10, mol.nelectrons(ifr=1), "M2")

    mol = mol.get_fragment([1], 0, group_fragments=False)
    # Notice the 0th/1st fragments change if default group_fragments=True.
    ifr0 = 0
    ifr1 = 1
    assert compare_values(16.04859029,
                          mol.nuclear_repulsion_energy(),
                          "D",
                          atol=1.0e-5)
    assert compare_values(0.0,
                          mol.nuclear_repulsion_energy(ifr=ifr0),
                          "M1",
                          atol=1.0e-5)
    assert compare_values(16.04859029,
                          mol.nuclear_repulsion_energy(ifr=ifr1),
                          "M2",
                          atol=1.0e-5)

    assert compare(10, mol.nelectrons(), "D")
    assert compare(0, mol.nelectrons(ifr=ifr0), "M1")
    assert compare(10, mol.nelectrons(ifr=ifr1), "M2")
Ejemplo n.º 25
0
def test_nuclearrepulsionenergy_nelectrons():

    mol = Molecule.from_data("""
    0 1
    --
    O          0.75119       -0.61395        0.00271
    H          1.70471       -0.34686        0.00009
    --
    1 1
    N         -2.77793        0.00179       -0.00054
    H         -2.10136        0.51768        0.60424
    H         -3.45559       -0.51904        0.60067
    H         -2.26004       -0.67356       -0.60592
    H         -3.29652        0.68076       -0.60124
    units ang
    """)

    assert compare_values(34.60370459,
                          mol.nuclear_repulsion_energy(),
                          'D',
                          atol=1.e-5)
    assert compare_values(4.275210518,
                          mol.nuclear_repulsion_energy(ifr=0),
                          'M1',
                          atol=1.e-5)
    assert compare_values(16.04859029,
                          mol.nuclear_repulsion_energy(ifr=1),
                          'M2',
                          atol=1.e-5)

    assert compare(20, mol.nelectrons(), 'D')
    assert compare(10, mol.nelectrons(ifr=0), 'M1')
    assert compare(10, mol.nelectrons(ifr=1), 'M2')

    mol = mol.get_fragment([1], 0)
    # Notice the 0th/1st fragments change. Got to stop get_fragment from reordering
    ifr0 = 1
    ifr1 = 0
    assert compare_values(16.04859029,
                          mol.nuclear_repulsion_energy(),
                          'D',
                          atol=1.e-5)
    assert compare_values(0.0,
                          mol.nuclear_repulsion_energy(ifr=ifr0),
                          'M1',
                          atol=1.e-5)
    assert compare_values(16.04859029,
                          mol.nuclear_repulsion_energy(ifr=ifr1),
                          'M2',
                          atol=1.e-5)

    assert compare(10, mol.nelectrons(), 'D')
    assert compare(0, mol.nelectrons(ifr=ifr0), 'M1')
    assert compare(10, mol.nelectrons(ifr=ifr1), 'M2')
Ejemplo n.º 26
0
def test_from_file_numpy(tmp_path):

    ele = np.array(water_molecule.atomic_numbers).reshape(-1, 1)
    npwater = np.hstack((ele, water_molecule.geometry))

    # Try npy
    p = tmp_path / "water.npy"
    np.save(p, npwater)
    mol = Molecule.from_file(p)

    assert mol.compare(water_molecule)
Ejemplo n.º 27
0
    def update_derived_properties(self, verbose: bool = True):
        """Update all derived properties for a molecule

        Includes thermochemistry and lookup hashes

        Args:
            verbose: Whether to print out log messages
        """

        self.xyz_hash = get_hash(Molecule.from_data(self.xyz, 'xyz'))
        self.update_thermochem(verbose=verbose)
def read_aggregate_molecules(input_json):
    """ Extract the molecules and the index of them from the input json file
    aggregate molecules with the same index into a list

    Parameters
    ----------
    input_json: str,
        JSON file name to the output json of generate.py
        The data in the json file should be a list of {'initial_molecules': [..], 'cmiles_identifiers':{}}.

    Returns
    -------
    molecules_list_dict: dict
        The dictionary maps the index of a molecule to a Molecule object. e.g.
        {
            index1: [Molecule_json1a, Molecule_json1b, ..],
            index2: [Molecule_json2a, Molecule_json2b, ..],
        }

    molecule_attributes: dict
        The dicitonary maps the index of a molecule to the attributes of the molecule, e.g.
        {
            index1: {'canonical_explicit_hydrogen_smiles': .., 'canonical_isomeric_smiles': .., ..}
        }

    Note
    ----
    1. The mdata['cmiles_identifiers']['canonical_isomeric_smiles'] is selected as the index.
    2. For molecules have the same "canonical_isomeric_smiles", we use index-1, index-2 to distinguish them.
    """
    molecules_list_dict = defaultdict(list)
    molecule_attributes = {}
    # open json file
    if input_json.endswith(".tar") or input_json.endswith(".tar.gz"):
        extract_file = input_json.replace(".gz", "").replace(".tar", ".json")
        with tarfile.open(input_json, 'r') as infile:
            molecule_data_list = json.load(infile.extractfile(extract_file))
    else:
        with open(input_json) as infile:
            molecule_data_list = json.load(infile)
    # put molecules and attributes into molecules_list_dict
    molecule_hash = defaultdict(set)  # use a dictionary to remove duplicates
    for mdata in molecule_data_list:
        initial_molecules = mdata['initial_molecules']
        cmiles_ids = mdata['cmiles_identifiers']
        index = cmiles_ids['canonical_isomeric_smiles']
        molecule_attributes[index] = cmiles_ids
        for m_json in initial_molecules:
            m_hash = Molecule.from_data(m_json).get_hash()
            # find duplicated molecules using their hash and skip them
            if m_hash not in molecule_hash[index]:
                molecule_hash[index].add(m_hash)
                molecules_list_dict[index].append(m_json)
    return molecules_list_dict, molecule_attributes
Ejemplo n.º 29
0
def test_sparse_molecule_connectivity():
    """
    A bit of a weird test, but because we set connectivity it should carry through.
    """
    mol = Molecule(symbols=["He", "He"], geometry=[0, 0, -2, 0, 0, 2], connectivity=None)
    assert "connectivity" in mol.dict()
    assert mol.dict()["connectivity"] is None

    mol = Molecule(symbols=["He", "He"], geometry=[0, 0, -2, 0, 0, 2])
    assert "connectivity" not in mol.dict()
Ejemplo n.º 30
0
def test_pyscf_base():
    """Test ScfPySCF class."""
    mol = Molecule.from_data("""Li 0 0 0""")
    mol2 = Molecule.from_data("""He 0 0 0""")
    basis = 0
    basis2 = 'sto-3g'
    method0 = 'adc'
    method2 = 'hf'
    method3 = 'dft'
    with pytest.raises(TypeError):
        hf = ScfPyScf(mol, basis, method2)
    with pytest.raises(ValueError):
        hf = ScfPyScf(mol2, basis2, method0)
        hf.perturb_fock(basis)
    with pytest.raises(TypeError):
        hf = ScfPyScf(mol2, basis2, method2)
        hf.perturb_fock(basis)
    with pytest.raises(ValueError):
        ScfPyScf(mol2, basis2, method3)
    with pytest.raises(NotImplementedError):
        ScfPyScf(mol, basis2, method2)