def test_from_data_kwargs(): mol = Molecule.from_data( """ O 0 0 0 H 0 1.5 0 H 0 0 1.5 """, molecular_charge=1, molecular_multiplicity=2, fragment_charges=[1], fragment_multiplicities=[2], ) assert mol.molecular_charge == 1 assert mol.molecular_multiplicity == 2 assert mol.fragment_charges[0] == 1 assert mol.fragment_multiplicities[0] == 2 mol = Molecule.from_data( """ O 0 0 0 H 0 1.5 0 H 0 0 1.5 """, molecular_charge=1, molecular_multiplicity=2, ) assert mol.molecular_charge == 1 assert mol.molecular_multiplicity == 2 assert mol.fragment_charges[0] == 1 assert mol.fragment_multiplicities[0] == 2
def test_molecule_np_constructors(): """ Neon tetramer fun """ ### Neon Tetramer neon_from_psi = Molecule.from_data( """ Ne 0.000000 0.000000 0.000000 -- Ne 3.100000 0.000000 0.000000 -- Ne 0.000000 3.200000 0.000000 -- Ne 0.000000 0.000000 3.300000 units bohr""", dtype="psi4", ) ele = np.array([10, 10, 10, 10]).reshape(-1, 1) npneon = np.hstack((ele, neon_from_psi.geometry)) neon_from_np = Molecule.from_data(npneon, name="neon tetramer", dtype="numpy", frags=[1, 2, 3], units="bohr") assert neon_from_psi == neon_from_np # Check the JSON construct/deconstruct neon_from_json = Molecule.from_data(neon_from_psi.json(), dtype="json") assert neon_from_psi == neon_from_json assert neon_from_json.get_molecular_formula() == "Ne4"
def relax_structure(smiles: str, qc_config: QCInputSpecification, compute_config: Optional[Union[TaskConfig, Dict]] = None, compute_connectivity: bool = False, code: str = _code) -> Tuple[str, float]: """Compute the atomization energy of a molecule given the SMILES string Args: smiles (str): SMILES of a molecule qc_config (dict): Quantum Chemistry configuration used for evaluating the energy compute_config (TaskConfig): Configuration for the quantum chemistry code compute_connectivity (bool): Whether we must compute connectivity before calling code code (str): Which QC code to use for the evaluation Returns: (str): Structure of the molecule (float): Electronic energy of this molecule """ # Generate 3D coordinates by minimizing MMFF forcefield xyz = generate_atomic_coordinates(smiles) mol = Molecule.from_data(xyz, dtype='xyz') # Generate connectivity, if needed if compute_connectivity: conn = guess_connectivity(mol.symbols, mol.geometry, default_connectivity=1.0) mol = Molecule.from_data({**mol.dict(), 'connectivity': conn}) # Run the relaxation opt_input = OptimizationInput(input_specification=qc_config, initial_molecule=mol, keywords={'program': code, 'convergence_set': 'GAU_VERYTIGHT'}) res: OptimizationResult = \ compute_procedure(opt_input, 'geometric', local_options=compute_config, raise_error=True) return res.final_molecule.to_string('xyz'), res.energies[-1]
def test_water_orient(): # These are identical molecules, should find the correct results mol = Molecule.from_data( """ O -1.551007 -0.114520 0.000000 H -1.934259 0.762503 0.000000 H -0.599677 0.040712 0.000000 -- O -0.114520 -1.551007 10.000000 H 0.762503 -1.934259 10.000000 H 0.040712 -0.599677 10.000000 """ ) frag_0 = mol.get_fragment(0, orient=True) frag_1 = mol.get_fragment(1, orient=True) # Make sure the fragments match assert frag_0.get_hash() == frag_1.get_hash() # Make sure the complexes match frag_0_1 = mol.get_fragment(0, 1, orient=True, group_fragments=True) frag_1_0 = mol.get_fragment(1, 0, orient=True, group_fragments=True) assert frag_0_1.get_hash() == frag_1_0.get_hash() # Fragments not reordered, should be different molecules. frag_0_1 = mol.get_fragment(0, 1, orient=True, group_fragments=False) frag_1_0 = mol.get_fragment(1, 0, orient=True, group_fragments=False) assert frag_0_1.get_hash() != frag_1_0.get_hash() # These are identical molecules, but should be different with ghost mol = Molecule.from_data( """ O -1.551007 -0.114520 0.000000 H -1.934259 0.762503 0.000000 H -0.599677 0.040712 0.000000 -- O -11.551007 -0.114520 0.000000 H -11.934259 0.762503 0.000000 H -10.599677 0.040712 0.000000 """, dtype="psi4", orient=True, ) frag_0 = mol.get_fragment(0, orient=True) frag_1 = mol.get_fragment(1, orient=True) # Make sure the fragments match assert frag_0.molecular_multiplicity == 1 assert frag_0.get_hash() == frag_1.get_hash() # Make sure the complexes match frag_0_1 = mol.get_fragment(0, 1, orient=True) frag_1_0 = mol.get_fragment(1, 0, orient=True) # Ghost fragments should prevent overlap assert frag_0_1.molecular_multiplicity == 1 assert frag_0_1.get_hash() != frag_1_0.get_hash()
def test_hash(): inchi, xyz = generate_inchi_and_xyz('O') mol = Molecule.from_data(xyz, 'xyz') assert mol.get_hash() != mol.orient_molecule().get_hash() assert get_hash(mol) == get_hash(mol.orient_molecule()) ox_mol = Molecule.from_data(xyz, 'xyz', molecular_charge=1) assert ox_mol.molecular_multiplicity != mol.molecular_multiplicity assert mol.get_hash() != ox_mol.get_hash() assert get_hash(mol) == get_hash(ox_mol.orient_molecule())
def test_molecule_data_constructor_numpy(): water_psi = water_dimer_minima.copy() ele = np.array(water_psi.atomic_numbers).reshape(-1, 1) npwater = np.hstack((ele, water_psi.geometry * qcel.constants.conversion_factor("Bohr", "angstrom"))) water_from_np = Molecule.from_data(npwater, name="water dimer", dtype="numpy", frags=[3]) assert water_psi.compare(water_from_np) water_from_np = Molecule.from_data(npwater, name="water dimer", frags=[3]) assert water_psi.compare(water_from_np) assert water_psi.get_molecular_formula() == "H4O2"
def test_molecule_data_constructor_dict(): water_psi = water_dimer_minima.copy() # Check the JSON construct/deconstruct water_from_json = Molecule.from_data(water_psi.dict()) assert water_psi.compare(water_psi, water_from_json) water_from_json = Molecule.from_data(water_psi.json(), "json") assert water_psi.compare(water_psi, water_from_json) assert water_psi.compare( Molecule.from_data(water_psi.to_string(), dtype="psi4"))
def test_molecule_data_constructor_dict(): water_psi = water_dimer_minima.copy() # Check the JSON construct/deconstruct water_from_json = Molecule.from_data(water_psi.dict()) assert water_psi == water_from_json water_from_json = Molecule.from_data(water_psi.json(), "json") assert water_psi == water_from_json assert water_psi == Molecule.from_data(water_psi.to_string("psi4"), dtype="psi4") assert water_psi.get_hash() == "3c4b98f515d64d1adc1648fe1fe1d6789e978d34" # copied from schema_version=1 assert water_psi.schema_version == 2 assert water_psi.schema_name == "qcschema_molecule"
def test_pyscf_wrap_dft_co_h2o_sto3g(): """Test embedded HF-in-HF case.""" # Compared with QChem results co = Molecule.from_data( """C -3.6180905689 1.3768035675 -0.0207958979 O -4.7356838533 1.5255563000 0.1150239130""" ) h2o = Molecule.from_data( """O -7.9563726699 1.4854060709 0.1167920007 H -6.9923165534 1.4211335985 0.1774706091 H -8.1058463545 2.4422204631 0.1115993752""" ) basis = 'sto-3g' xc_code = 'LDA,VWN' method = 'dft' args0 = {"mol": co, "basis": basis, "method": method, "xc_code": xc_code} args1 = {"mol": h2o, "basis": basis, "method": method, "xc_code": xc_code} embs = { "mol": co, "basis": basis, "method": 'dft', "xc_code": 'LDA,VWN', "t_code": 'XC_LDA_K_TF' } wrap = PyScfWrap(args0, args1, embs) wrap.run_embedding() embdic = wrap.energy_dict # Read reference qchem_rho_A_rho_B = 20.9016932248 qchem_rho_A_Nuc_B = -21.0856319395 qchem_rho_B_Nuc_A = -20.8950212739 assert abs(qchem_rho_A_rho_B - embdic['rho0_rho1']) < 1e-5 assert abs(qchem_rho_A_Nuc_B - embdic['nuc0_rho1']) < 1e-5 assert abs(qchem_rho_B_Nuc_A - embdic['nuc1_rho0']) < 1e-5 # DFT related terms qchem_int_ref_xc = -0.0011261095 qchem_int_ref_t = 0.0022083882 qchem_exc_nad = -0.0020907144 qchem_et_nad = 0.0029633384 qchem_int_emb_xc = -0.0011281762 qchem_int_emb_t = 0.0022122190 qchem_deltalin = 0.0000017641 assert abs(qchem_et_nad - embdic['et_nad']) < 1e-6 assert abs(qchem_exc_nad - embdic['exc_nad']) < 1e-6 assert abs(qchem_int_ref_t - embdic['int_ref_t']) < 1e-6 assert abs(qchem_int_ref_xc - embdic['int_ref_xc']) < 1e-6 assert abs(qchem_int_emb_t - embdic['int_emb_t']) < 1e-6 assert abs(qchem_int_emb_xc - embdic['int_emb_xc']) < 1e-6 assert abs(qchem_deltalin - embdic['deltalin']) < 1e-7
def match_geometry( self, mol: Molecule, tolerance: float = 1e-4) -> Tuple[AccuracyLevel, OxidationState]: """Match a geometry to one in this record Args: mol: Molecule structure in XYZ format tolerance: RMSD tolerance when matching by alignment Returns: - Accuracy level used to compute this structure - Oxidation state of this structure Raises: KeyError if structure not found """ # Get the hash of my molecule mol_hash = get_hash(mol) # See if we can find a match based on hash for level, geoms in self.data.items(): for state, geom in geoms.items(): if geom.xyz_hash == mol_hash: return level, state # If that fails, attempt to match based on alignment for level, geoms in self.data.items(): for state, geom in geoms.items(): target_mol = Molecule.from_data(geom.xyz, 'xyz') model, data = target_mol.align(mol, atoms_map=True) if data['rmsd'] < tolerance: return level, state raise UnmatchedGeometry()
def submit_vertical_geometries(geom_dataset: GeometryDataset, vert_datasets: List[SinglePointDataset]): all_geoms = geom_dataset.get_geometries() print(f'Found {len(all_geoms)} molecules in {geom_dataset.coll.name}') for inchi, geoms in all_geoms.items(): # Get the neutral geometry if 'neutral' not in geoms: continue geom = geoms['neutral'].to_string('xyz') # Start the neutral geometry in all three charge states for postfix, charge in zip(['reduced', 'neutral', 'oxidized'], [-1, 0, 1]): # Make a name if charge != 0: identifier = f'{inchi}_xtb_neutral_{postfix}' else: identifier = f'{inchi}_xtb_neutral' new_geom = Molecule.from_data(geom, 'xyz', molecular_charge=charge, name=identifier) # Loop over the different levels of accuracy for vert in vert_datasets: vert.add_molecule(new_geom, inchi, save=False) for vert in vert_datasets: # Start the computations vert.coll.save() vert_started = vert.start_computation() print(f'Started {vert_started} computations for {vert.coll.name}')
def relax_structure(xyz: str, qc_config: QCInputSpecification, charge: int = 0, compute_config: Optional[Union[TaskConfig, Dict]] = None, code: str = _code) -> OptimizationResult: """Compute the atomization energy of a molecule given the SMILES string Args: xyz (str): Structure of a molecule in XYZ format qc_config (dict): Quantum Chemistry configuration used for evaluating the energy charge (int): Charge of the molecule compute_config (TaskConfig): Configuration for the quantum chemistry code, such as parallelization settings code (str): Which QC code to use for the evaluation Returns: (OptimizationResult): Full output from the calculation """ # Parse the molecule mol = Molecule.from_data(xyz, dtype='xyz', molecular_charge=charge) # Run the relaxation if code == "nwchem": keywords = {"driver__maxiter": 100, "set__driver:linopt": 0} relax_code = "nwchemdriver" else: keywords = {"program": code} relax_code = "geometric" opt_input = OptimizationInput(input_specification=qc_config, initial_molecule=mol, keywords=keywords) return compute_procedure(opt_input, relax_code, local_options=compute_config, raise_error=True)
def run_single_point(xyz: str, driver: DriverEnum, qc_config: QCInputSpecification, charge: int = 0, compute_config: Optional[Union[TaskConfig, Dict]] = None, code: str = _code) -> AtomicResult: """Run a single point calculation Args: xyz: Structure in XYZ format driver: What type of property to compute: energy, gradient, hessian qc_config (dict): Quantum Chemistry configuration used for evaluating the energy charge (int): Charge of the molecule compute_config (TaskConfig): Configuration for the quantum chemistry code, such as parallelization settings code (str): Which QC code to use for the evaluation Returns: QCElemental-format result of the output """ # Parse the molecule mol = Molecule.from_data(xyz, dtype="xyz", molecular_charge=charge) # Run the computation input_spec = AtomicInput(molecule=mol, driver=driver, **qc_config.dict(exclude={'driver'})) return compute(input_spec, code, local_options=compute_config, raise_error=True)
def compute_reference_energy(element: str, qc_config: QCInputSpecification, n_open: int, code: str = _code) -> float: """Compute the energy of an isolated atom in vacuum Args: element (str): Symbol of the element qc_config (QCInputSpecification): Quantum Chemistry configuration used for evaluating he energy n_open (int): Number of open atomic orbitals code (str): Which QC code to use for the evaluation Returns: (float): Energy of the isolated atom """ # Make the molecule xyz = f'1\n{element}\n{element} 0 0 0' mol = Molecule.from_data(xyz, dtype='xyz', molecular_multiplicity=n_open, molecular_charge=0) # Run the atomization energy calculation input_spec = AtomicInput(molecule=mol, driver='energy', **qc_config.dict(exclude={'driver'})) result = compute(input_spec, code, raise_error=True) return result.return_result
def test_hf_co_sto3g(): """Test functions of ScfPyScf class.""" mol = Molecule.from_data( """C -3.6180905689 1.3768035675 -0.0207958979 O -4.7356838533 1.5255563000 0.1150239130""" ) basis = 'sto-3g' method = 'hf' hf = ScfPyScf(mol, basis, method) hf.solve_scf(conv_tol=1e-12) dm0 = hf.get_density() nao_co = len(dm0) ref_dm0 = np.loadtxt(cache.files["co_h2o_sto3g_dma"]).reshape( (nao_co, nao_co)) np.testing.assert_allclose(ref_dm0 * 2, dm0, atol=1e-6) unperturbed_fock = hf.get_fock() assert 'scf' in hf.energy assert abs(hf.energy["scf"] - -111.22516947) < 1e-7 vemb = np.zeros_like(dm0) hf.perturb_fock(vemb) hf.solve_scf() dm0_again = hf.get_density() np.testing.assert_allclose(ref_dm0 * 2, dm0_again, atol=1e-6) assert abs(hf.energy["scf"] - -111.22516947) < 1e-7 perturbed_fock = hf.get_fock() np.testing.assert_allclose(unperturbed_fock, perturbed_fock, atol=1e-9)
def read_aggregate_molecules(input_json): molecules_list_dict = defaultdict(list) molecule_attributes = {} # open json file if input_json.endswith(".tar") or input_json.endswith(".tar.gz"): extract_file = input_json.replace(".gz", "").replace(".tar", ".json") with tarfile.open(input_json, 'r') as infile: molecule_data_list = json.load(infile.extractfile(extract_file)) else: with open(input_json) as infile: molecule_data_list = json.load(infile) # put molecules and attributes into molecules_list_dict molecule_hash = defaultdict(set) # use a dictionary to remove duplicates for mdata in molecule_data_list: initial_molecules = mdata['initial_molecules'] cmiles_ids = mdata['cmiles_identifiers'] index = cmiles_ids['canonical_isomeric_smiles'] molecule_attributes[index] = cmiles_ids for m_json in initial_molecules: m_hash = Molecule.from_data(m_json).get_hash() # find duplicated molecules using their hash and skip them if m_hash not in molecule_hash[index]: molecule_hash[index].add(m_hash) molecules_list_dict[index].append(m_json) return molecules_list_dict, molecule_attributes
def test_hash_canary(): water_dimer_minima = Molecule.from_data( """ 0 1 O -1.551007 -0.114520 0.000000 H -1.934259 0.762503 0.000000 H -0.599677 0.040712 0.000000 -- O 1.350625 0.111469 0.000000 H 1.680398 -0.373741 -0.758561 H 1.680398 -0.373741 0.758561 """, dtype="psi4", ) assert water_dimer_minima.get_hash( ) == "42f3ac52af52cf2105c252031334a2ad92aa911c" # Check orientation mol = water_dimer_minima.orient_molecule() assert mol.get_hash() == "632490a0601500bfc677e9277275f82fbc45affe" frag_0 = mol.get_fragment(0, orient=True) frag_1 = mol.get_fragment(1, orient=True) assert frag_0.get_hash() == "d0b499739f763e8d3a5556b4ddaeded6a148e4d5" assert frag_1.get_hash() == "bdc1f75bd1b7b999ff24783d7c1673452b91beb9"
def test_pyscf_wrap0(): """Test basic functionality of PyScfWrap.""" mol = Molecule.from_data("""He 0 0 0""") basis = 'sto-3g' dict0 = {'mol': 0} args0 = {"mol": mol, "basis": basis, "method": 'adc'} args1 = {"mol": mol, "basis": basis, "method": 'dft'} embs0 = {"mol": mol, "basis": basis, "method": 'hf'} embs1 = { "mol": mol, "basis": basis, "method": 'hf', "xc_code": 'LDA,VWN', "t_code": 'XC_LDA_K_TF' } with pytest.raises(KeyError): PyScfWrap(dict0, embs0, embs1) with pytest.raises(KeyError): PyScfWrap(embs0, dict0, embs1) with pytest.raises(ValueError): PyScfWrap(embs0, args1, embs1) with pytest.raises(KeyError): PyScfWrap(embs0, embs0, embs0) with pytest.raises(ValueError): PyScfWrap(args0, embs0, embs1)
def test_fragment_charge_configurations(f1c, f1m, f2c, f2m, tc, tm): mol = Molecule.from_data(""" {f1c} {f1m} Li 0 0 0 -- {f2c} {f2m} Li 0 0 5 """.format(f1c=f1c, f1m=f1m, f2c=f2c, f2m=f2m)) assert pytest.approx(mol.molecular_charge) == tc assert mol.molecular_multiplicity == tm # Test fragment1 assert pytest.approx(mol.get_fragment(0).molecular_charge) == f1c assert mol.get_fragment(0).molecular_multiplicity == f1m assert pytest.approx(mol.get_fragment(0, 1).molecular_charge) == f1c assert mol.get_fragment(0, 1).molecular_multiplicity == f1m # Test fragment2 assert pytest.approx(mol.get_fragment(1).molecular_charge) == f2c assert mol.get_fragment(1).molecular_multiplicity == f2m assert pytest.approx(mol.get_fragment([1], 0).molecular_charge) == f2c assert mol.get_fragment(1, [0]).molecular_multiplicity == f2m
def test_openmm_gaff_keywords(gaff_settings): """ Test the different running settings with gaff. """ program = "openmm" water = qcng.get_molecule("water") water_dict = water.dict() # add water cmiles to the molecule water_dict["extras"] = { "cmiles": { "canonical_isomeric_explicit_hydrogen_mapped_smiles": "[H:2][O:1][H:3]" } } molecule = Molecule.from_data(water_dict) keywords, error, expected_result = gaff_settings model = {"method": "gaff-2.1", "basis": "antechamber"} inp = AtomicInput(molecule=molecule, driver="energy", model=model, keywords=keywords) if error is not None: with pytest.raises(error): _ = qcng.compute(inp, program, raise_error=True) else: ret = qcng.compute(inp, program, raise_error=False) assert ret.success is True assert ret.return_result == pytest.approx(expected_result, rel=1e-6)
def test_openmm_cmiles_gradient_nomatch(): program = "openmm" water = qcng.get_molecule("water") water_dict = water.dict() # add ethane cmiles to the molecule water_dict["extras"] = { "cmiles": { "canonical_isomeric_explicit_hydrogen_mapped_smiles": "[H:3][C:1]([H:4])([H:5])[C:2]([H:6])([H:7])[H:8]" } } molecule = Molecule.from_data(water_dict) model = {"method": "openff-1.0.0", "basis": "smirnoff"} inp = AtomicInput(molecule=molecule, driver="gradient", model=model) ret = qcng.compute(inp, program, raise_error=False) # if we correctly find the cmiles this should fail as the molecule and cmiles are different assert ret.success is False assert ( "molecule.add_conformer given input of the wrong shape: Given (3, 3), expected (8, 3)" in ret.error.error_message)
def _spawn_optimization( grid_point: str, job: List[float], input_model: "TorsionDriveInput", config: "TaskConfig" ) -> Union[FailedOperation, OptimizationResult]: """Spawns an optimization at a particular grid point and returns the result. Parameters ---------- grid_point A string of the form 'dihedral_1_angle ... dihedral_n_angle' that encodes the current dihedrals angles to optimize at. job The flattened conformer of the molecule to start the optimization at with length=(n_atoms * 3) input_model The input model containing the relevant settings for how to optimize the structure. config The configuration to launch the task using. Returns ------- The result of the optimization if successful, otherwise an error containing object. """ from qcengine import compute_procedure input_molecule = input_model.initial_molecule[0].copy(deep=True).dict() input_molecule["geometry"] = np.array(job).reshape( len(input_molecule["symbols"]), 3) input_molecule = Molecule.from_data(input_molecule) dihedrals = input_model.keywords.dihedrals angles = grid_point.split() keywords = { **input_model.optimization_spec.keywords, "constraints": { "set": [{ "type": "dihedral", "indices": dihedral, "value": int(angle), } for dihedral, angle in zip(dihedrals, angles)] }, } input_data = OptimizationInput( keywords=keywords, extras={}, protocols=input_model.optimization_spec.protocols, input_specification=input_model.input_specification, initial_molecule=input_molecule, ) return compute_procedure( input_data, procedure=input_model.optimization_spec.procedure, local_options=config.dict())
def test_molecule_json_serialization(): assert isinstance(water_dimer_minima.json(), str) assert isinstance( water_dimer_minima.dict(encoding="json")["geometry"], list) assert water_dimer_minima == Molecule.from_data(water_dimer_minima.json(), dtype="json")
def read_molecules(input_json): """ Extract the molecules and the index of them from the input json file Parameters ---------- input_json: str, JSON file name to the output json of generate.py The data in the json file should be a list of {'initial_molecules': [..], 'cmiles_identifiers':{}}. Returns ------- molecules_dict: dict The dictionary maps the index of a molecule to a Molecule object. e.g. { index1: Molecule1, index2: Molecule2, } molecule_attributes: dict The dicitonary maps the index of a molecule to the attributes of the molecule, e.g. { index1: {'canonical_explicit_hydrogen_smiles': .., 'canonical_isomeric_smiles': .., ..} } Note ---- 1. The mdata['cmiles_identifiers']['canonical_isomeric_smiles'] is selected as the index. 2. For molecules have the same "canonical_isomeric_smiles", we use index-1, index-2 to distinguish them. """ molecules_dict = {} molecule_attributes = {} if input_json.endswith(".tar") or input_json.endswith(".tar.gz"): extract_file = input_json.replace(".gz", "").replace(".tar", ".json") with tarfile.open(input_json, 'r') as infile: molecule_data_list = json.load(infile.extractfile(extract_file)) else: with open(input_json) as infile: molecule_data_list = json.load(infile) index_counter = Counter() for mdata in molecule_data_list: initial_molecules = mdata['initial_molecules'] cmiles_ids = mdata['cmiles_identifiers'] index = cmiles_ids['canonical_isomeric_smiles'] for i_conformer, initial_molecule in enumerate(initial_molecules): qcel_molecule = Molecule.from_data(initial_molecule) # use count to generate unique index index_count = index_counter[index] this_index = f'{index}-{index_count}' index_counter[index] += 1 assert this_index not in molecules_dict, f"Multiple molecules have the same index, please check {mdata}" molecules_dict[this_index] = qcel_molecule molecule_attributes[this_index] = cmiles_ids return molecules_dict, molecule_attributes
def test_from_data_kwargs(): mol = Molecule.from_data( """ O 0 0 0 H 0 1.5 0 H 0 0 1.5 """, molecular_charge=1, molecular_multiplicity=2, fragment_charges=[1], fragment_multiplicities=[2], ) assert mol.molecular_charge == 1 assert mol.molecular_multiplicity == 2 assert mol.fragment_charges[0] == 1 assert mol.fragment_multiplicities[0] == 2 mol = Molecule.from_data( """ O 0 0 0 H 0 1.5 0 H 0 0 1.5 """, molecular_charge=1, molecular_multiplicity=2, ) assert mol.molecular_charge == 1 assert mol.molecular_multiplicity == 2 assert mol.fragment_charges[0] == 1 assert mol.fragment_multiplicities[0] == 2 with pytest.raises(qcel.ValidationError) as e: mol = Molecule.from_data( """ O 0 0 0 H 0 1.5 0 H 0 0 1.5 """, molecular_charge=1, molecular_multiplicity=2, fragment_charges=[2], ) assert "Inconsistent or unspecified chg/mult" in str(e.value)
def test_nuclearrepulsionenergy_nelectrons(): mol = Molecule.from_data(""" 0 1 -- O 0.75119 -0.61395 0.00271 H 1.70471 -0.34686 0.00009 -- 1 1 N -2.77793 0.00179 -0.00054 H -2.10136 0.51768 0.60424 H -3.45559 -0.51904 0.60067 H -2.26004 -0.67356 -0.60592 H -3.29652 0.68076 -0.60124 units ang """) assert compare_values(34.60370459, mol.nuclear_repulsion_energy(), 'D', atol=1.e-5) assert compare_values(4.275210518, mol.nuclear_repulsion_energy(ifr=0), 'M1', atol=1.e-5) assert compare_values(16.04859029, mol.nuclear_repulsion_energy(ifr=1), 'M2', atol=1.e-5) assert compare(20, mol.nelectrons(), 'D') assert compare(10, mol.nelectrons(ifr=0), 'M1') assert compare(10, mol.nelectrons(ifr=1), 'M2') mol = mol.get_fragment([1], 0) # Notice the 0th/1st fragments change. Got to stop get_fragment from reordering ifr0 = 1 ifr1 = 0 assert compare_values(16.04859029, mol.nuclear_repulsion_energy(), 'D', atol=1.e-5) assert compare_values(0.0, mol.nuclear_repulsion_energy(ifr=ifr0), 'M1', atol=1.e-5) assert compare_values(16.04859029, mol.nuclear_repulsion_energy(ifr=ifr1), 'M2', atol=1.e-5) assert compare(10, mol.nelectrons(), 'D') assert compare(0, mol.nelectrons(ifr=ifr0), 'M1') assert compare(10, mol.nelectrons(ifr=ifr1), 'M2')
def read_aggregate_molecules(input_json): """ Extract the molecules and the index of them from the input json file aggregate molecules with the same index into a list Parameters ---------- input_json: str, JSON file name to the output json of generate.py The data in the json file should be a list of {'initial_molecules': [..], 'cmiles_identifiers':{}}. Returns ------- molecules_list_dict: dict The dictionary maps the index of a molecule to a Molecule object. e.g. { index1: [Molecule_json1a, Molecule_json1b, ..], index2: [Molecule_json2a, Molecule_json2b, ..], } molecule_attributes: dict The dicitonary maps the index of a molecule to the attributes of the molecule, e.g. { index1: {'canonical_explicit_hydrogen_smiles': .., 'canonical_isomeric_smiles': .., ..} } Note ---- 1. The mdata['cmiles_identifiers']['canonical_isomeric_smiles'] is selected as the index. 2. For molecules have the same "canonical_isomeric_smiles", we use index-1, index-2 to distinguish them. """ molecules_list_dict = defaultdict(list) molecule_attributes = {} # open json file if input_json.endswith(".tar") or input_json.endswith(".tar.gz"): extract_file = input_json.replace(".gz", "").replace(".tar", ".json") with tarfile.open(input_json, 'r') as infile: molecule_data_list = json.load(infile.extractfile(extract_file)) else: with open(input_json) as infile: molecule_data_list = json.load(infile) # put molecules and attributes into molecules_list_dict molecule_hash = defaultdict(set) # use a dictionary to remove duplicates for mdata in molecule_data_list: initial_molecules = mdata['initial_molecules'] cmiles_ids = mdata['cmiles_identifiers'] index = cmiles_ids['canonical_isomeric_smiles'] molecule_attributes[index] = cmiles_ids for m_json in initial_molecules: m_hash = Molecule.from_data(m_json).get_hash() # find duplicated molecules using their hash and skip them if m_hash not in molecule_hash[index]: molecule_hash[index].add(m_hash) molecules_list_dict[index].append(m_json) return molecules_list_dict, molecule_attributes
def update_derived_properties(self, verbose: bool = True): """Update all derived properties for a molecule Includes thermochemistry and lookup hashes Args: verbose: Whether to print out log messages """ self.xyz_hash = get_hash(Molecule.from_data(self.xyz, 'xyz')) self.update_thermochem(verbose=verbose)
def test_nuclearrepulsionenergy_nelectrons(): mol = Molecule.from_data(""" 0 1 -- O 0.75119 -0.61395 0.00271 H 1.70471 -0.34686 0.00009 -- 1 1 N -2.77793 0.00179 -0.00054 H -2.10136 0.51768 0.60424 H -3.45559 -0.51904 0.60067 H -2.26004 -0.67356 -0.60592 H -3.29652 0.68076 -0.60124 units ang """) assert compare_values(34.60370459, mol.nuclear_repulsion_energy(), "D", atol=1.0e-5) assert compare_values(4.275210518, mol.nuclear_repulsion_energy(ifr=0), "M1", atol=1.0e-5) assert compare_values(16.04859029, mol.nuclear_repulsion_energy(ifr=1), "M2", atol=1.0e-5) assert compare(20, mol.nelectrons(), "D") assert compare(10, mol.nelectrons(ifr=0), "M1") assert compare(10, mol.nelectrons(ifr=1), "M2") mol = mol.get_fragment([1], 0, group_fragments=False) # Notice the 0th/1st fragments change if default group_fragments=True. ifr0 = 0 ifr1 = 1 assert compare_values(16.04859029, mol.nuclear_repulsion_energy(), "D", atol=1.0e-5) assert compare_values(0.0, mol.nuclear_repulsion_energy(ifr=ifr0), "M1", atol=1.0e-5) assert compare_values(16.04859029, mol.nuclear_repulsion_energy(ifr=ifr1), "M2", atol=1.0e-5) assert compare(10, mol.nelectrons(), "D") assert compare(0, mol.nelectrons(ifr=ifr0), "M1") assert compare(10, mol.nelectrons(ifr=ifr1), "M2")
def test_pyscf_base(): """Test ScfPySCF class.""" mol = Molecule.from_data("""Li 0 0 0""") mol2 = Molecule.from_data("""He 0 0 0""") basis = 0 basis2 = 'sto-3g' method0 = 'adc' method2 = 'hf' method3 = 'dft' with pytest.raises(TypeError): hf = ScfPyScf(mol, basis, method2) with pytest.raises(ValueError): hf = ScfPyScf(mol2, basis2, method0) hf.perturb_fock(basis) with pytest.raises(TypeError): hf = ScfPyScf(mol2, basis2, method2) hf.perturb_fock(basis) with pytest.raises(ValueError): ScfPyScf(mol2, basis2, method3) with pytest.raises(NotImplementedError): ScfPyScf(mol, basis2, method2)