def check_isomorphic(conformer): """ Compares whatever is in the log file 'f' to the SMILES of the passed in 'conformer' """ starting_molecule = RMGMolecule(smiles=conformer.smiles) starting_molecule = starting_molecule.to_single_bonds() atoms = self.read_log( os.path.join(scratch_dir, f) ) test_molecule = RMGMolecule() test_molecule.from_xyz( atoms.arrays["numbers"], atoms.arrays["positions"] ) if not starting_molecule.is_isomorphic(test_molecule): logging.info( "Output geometry of {} is not isomorphic with input geometry".format(calc.label)) return False else: logging.info( "{} was successful and was validated!".format(calc.label)) return True
def test_intra_r_add_exo_scission(self): """ Test that the Intra_R_Add_Exo_scission family returns a properly re-labeled product structure. This family is its own reverse. """ family = self.database.families['Intra_R_Add_Exo_scission'] reactants = [Molecule().from_adjacency_list(""" multiplicity 2 1 *3 C u0 p0 c0 {2,S} {8,S} {11,S} {12,S} 2 *2 C u0 p0 c0 {1,S} {3,B} {4,B} 3 C u0 p0 c0 {2,B} {5,B} {13,S} 4 C u0 p0 c0 {2,B} {7,B} {17,S} 5 C u0 p0 c0 {3,B} {6,B} {14,S} 6 C u0 p0 c0 {5,B} {7,B} {15,S} 7 C u0 p0 c0 {4,B} {6,B} {16,S} 8 *1 C u1 p0 c0 {1,S} {9,S} {18,S} 9 C u0 p0 c0 {8,S} {10,T} 10 C u0 p0 c0 {9,T} {19,S} 11 H u0 p0 c0 {1,S} 12 H u0 p0 c0 {1,S} 13 H u0 p0 c0 {3,S} 14 H u0 p0 c0 {5,S} 15 H u0 p0 c0 {6,S} 16 H u0 p0 c0 {7,S} 17 H u0 p0 c0 {4,S} 18 H u0 p0 c0 {8,S} 19 H u0 p0 c0 {10,S} """)] expected_product = Molecule().from_adjacency_list(""" multiplicity 2 1 *3 C u0 p0 c0 {2,S} {8,S} {9,S} {11,S} 2 *2 C u0 p0 c0 {1,S} {3,B} {4,B} 3 C u0 p0 c0 {2,B} {5,B} {12,S} 4 C u0 p0 c0 {2,B} {7,B} {16,S} 5 C u0 p0 c0 {3,B} {6,B} {13,S} 6 C u0 p0 c0 {5,B} {7,B} {14,S} 7 C u0 p0 c0 {4,B} {6,B} {15,S} 8 *1 C u1 p0 c0 {1,S} {17,S} {18,S} 9 C u0 p0 c0 {1,S} {10,T} 10 C u0 p0 c0 {9,T} {19,S} 11 H u0 p0 c0 {1,S} 12 H u0 p0 c0 {3,S} 13 H u0 p0 c0 {5,S} 14 H u0 p0 c0 {6,S} 15 H u0 p0 c0 {7,S} 16 H u0 p0 c0 {4,S} 17 H u0 p0 c0 {8,S} 18 H u0 p0 c0 {8,S} 19 H u0 p0 c0 {10,S} """) products = family.apply_recipe(reactants) self.assertEqual(len(products), 1) mapping = {} for label, atom in expected_product.get_all_labeled_atoms().items(): mapping[atom] = products[0].get_labeled_atoms(label)[0] self.assertTrue(expected_product.is_isomorphic(products[0], mapping))
def test_12_shift_c(self): """ Test that the 1,2_shiftC family returns a properly re-labeled product structure. This family is its own reverse. """ family = self.database.families['1,2_shiftC'] reactants = [ Molecule().from_adjacency_list(""" multiplicity 2 1 *2 C u0 p0 c0 {2,S} {3,S} {8,S} {9,S} 2 *1 C u0 p0 c0 {1,S} {10,S} {11,S} {12,S} 3 *3 C u1 p0 c0 {1,S} {4,S} {5,S} 4 C u0 p0 c0 {3,S} {6,D} {13,S} 5 C u0 p0 c0 {3,S} {7,D} {14,S} 6 C u0 p0 c0 {4,D} {7,S} {15,S} 7 C u0 p0 c0 {5,D} {6,S} {16,S} 8 H u0 p0 c0 {1,S} 9 H u0 p0 c0 {1,S} 10 H u0 p0 c0 {2,S} 11 H u0 p0 c0 {2,S} 12 H u0 p0 c0 {2,S} 13 H u0 p0 c0 {4,S} 14 H u0 p0 c0 {5,S} 15 H u0 p0 c0 {6,S} 16 H u0 p0 c0 {7,S} """) ] expected_product = Molecule().from_adjacency_list(""" multiplicity 2 1 *2 C u0 p0 c0 {2,S} {3,S} {4,S} {7,S} 2 *1 C u0 p0 c0 {1,S} {8,S} {9,S} {10,S} 3 C u0 p0 c0 {1,S} {5,D} {11,S} 4 C u0 p0 c0 {1,S} {6,D} {12,S} 5 C u0 p0 c0 {3,D} {6,S} {13,S} 6 C u0 p0 c0 {4,D} {5,S} {14,S} 7 *3 C u1 p0 c0 {1,S} {15,S} {16,S} 8 H u0 p0 c0 {2,S} 9 H u0 p0 c0 {2,S} 10 H u0 p0 c0 {2,S} 11 H u0 p0 c0 {3,S} 12 H u0 p0 c0 {4,S} 13 H u0 p0 c0 {5,S} 14 H u0 p0 c0 {6,S} 15 H u0 p0 c0 {7,S} 16 H u0 p0 c0 {7,S} """) products = family.apply_recipe(reactants) self.assertEqual(len(products), 1) mapping = {} for label, atom in expected_product.get_all_labeled_atoms().items(): mapping[atom] = products[0].get_labeled_atoms(label)[0] self.assertTrue(expected_product.is_isomorphic(products[0], mapping))
def test_intra_ene_reaction(self): """ Test that the Intra_ene_reaction family returns a properly re-labeled product structure. This family is its own reverse. """ family = self.database.families['Intra_ene_reaction'] reactants = [ Molecule().from_adjacency_list(""" 1 *1 C u0 p0 c0 {2,S} {3,S} {4,S} {10,S} 2 *5 C u0 p0 c0 {1,S} {5,D} {6,S} 3 *2 C u0 p0 c0 {1,S} {7,D} {11,S} 4 C u0 p0 c0 {1,S} {8,D} {12,S} 5 *4 C u0 p0 c0 {2,D} {7,S} {13,S} 6 C u0 p0 c0 {2,S} {9,D} {15,S} 7 *3 C u0 p0 c0 {3,D} {5,S} {14,S} 8 C u0 p0 c0 {4,D} {9,S} {17,S} 9 C u0 p0 c0 {6,D} {8,S} {16,S} 10 *6 H u0 p0 c0 {1,S} 11 H u0 p0 c0 {3,S} 12 H u0 p0 c0 {4,S} 13 H u0 p0 c0 {5,S} 14 H u0 p0 c0 {7,S} 15 H u0 p0 c0 {6,S} 16 H u0 p0 c0 {9,S} 17 H u0 p0 c0 {8,S} """) ] expected_product = Molecule().from_adjacency_list(""" 1 *2 C u0 p0 c0 {2,D} {3,S} {4,S} 2 *3 C u0 p0 c0 {1,D} {5,S} {6,S} 3 *1 C u0 p0 c0 {1,S} {7,S} {11,S} {10,S} 4 C u0 p0 c0 {1,S} {8,D} {12,S} 5 *4 C u0 p0 c0 {2,S} {7,D} {13,S} 6 C u0 p0 c0 {2,S} {9,D} {15,S} 7 *5 C u0 p0 c0 {3,S} {5,D} {14,S} 8 C u0 p0 c0 {4,D} {9,S} {17,S} 9 C u0 p0 c0 {6,D} {8,S} {16,S} 10 *6 H u0 p0 c0 {3,S} 11 H u0 p0 c0 {3,S} 12 H u0 p0 c0 {4,S} 13 H u0 p0 c0 {5,S} 14 H u0 p0 c0 {7,S} 15 H u0 p0 c0 {6,S} 16 H u0 p0 c0 {9,S} 17 H u0 p0 c0 {8,S} """) products = family.apply_recipe(reactants) self.assertEqual(len(products), 1) mapping = {} for label, atom in expected_product.get_all_labeled_atoms().items(): mapping[atom] = products[0].get_labeled_atoms(label)[0] self.assertTrue(expected_product.is_isomorphic(products[0], mapping))
def load(self, paths=None, names=None, ignore_incomplete=True): """ Load one or more set of reference species and append it on to the database Args: paths (list): A single path string, or a list of path strings pointing to a set of reference species to be loaded into the database. The string should point to the folder that has the name of the reference set. The name of sub-folders in a reference set directory should be indices starting from 0 and should contain a YAML file that defines the ReferenceSpecies object of that index, named {index}.yml names (list): Same functionality as `paths` but using names of the folders in the database. ignore_incomplete (bool): If ``True`` only species with both reference and calculated data will be added. """ paths = self.get_database_paths(paths=paths, names=names) molecule_list = [] for path in paths: set_name = os.path.basename(path) logging.info( f'Loading in reference set `{set_name}` from {path} ...') spcs_files = os.listdir(path) reference_set = [] for spcs in spcs_files: if '.yml' not in spcs: continue ref_spcs = ReferenceSpecies.__new__(ReferenceSpecies) ref_spcs.load_yaml(os.path.join(path, spcs)) molecule = Molecule().from_adjacency_list( ref_spcs.adjacency_list, raise_atomtype_exception=False, raise_charge_exception=False) if ignore_incomplete: if (len(ref_spcs.calculated_data) == 0) or (len( ref_spcs.reference_data) == 0): logging.warning( f'Molecule {ref_spcs.smiles} from reference set `{set_name}` does not have any ' f'reference data and/or calculated data. This entry will not be added' ) continue # perform isomorphism checks to prevent duplicate species for mol in molecule_list: if molecule.is_isomorphic(mol): logging.warning( f'Molecule {ref_spcs.smiles} from reference set `{set_name}` already exists in ' f'the reference database. The entry from this reference set will not be added. ' f'The path for this species is {spcs}') break else: molecule_list.append(molecule) reference_set.append(ref_spcs) self.reference_sets[set_name] = reference_set
def isomorphic_smiles( smiles_1: str, smiles_2: str, ) -> bool: """ Check whether two SMILES strings represent isomorphic molecules. Args: smiles_1: A SMILES string. smiles_2: A SMILES string. Returns: bool: Whether the two SMILES strings represent isomorphic molecules. """ mol_1 = Molecule(smiles=smiles_1) mol_2 = Molecule(smiles=smiles_2) return mol_1.is_isomorphic(mol_2)
def test_intra_substitution_s_isomerization(self): """ Test that the intra_substitutionS_isomerization family returns a properly re-labeled product structure. This family is its own reverse. """ family = self.database.families['intra_substitutionS_isomerization'] reactants = [ Molecule().from_adjacency_list(""" multiplicity 2 1 *2 C u0 p0 c0 {3,S} {4,S} {5,S} {6,S} 2 C u0 p0 c0 {3,S} {7,S} {8,S} {9,S} 3 *3 C u1 p0 c0 {1,S} {2,S} {10,S} 4 *1 S u0 p2 c0 {1,S} {11,S} 5 H u0 p0 c0 {1,S} 6 H u0 p0 c0 {1,S} 7 H u0 p0 c0 {2,S} 8 H u0 p0 c0 {2,S} 9 H u0 p0 c0 {2,S} 10 H u0 p0 c0 {3,S} 11 H u0 p0 c0 {4,S} """) ] expected_product = Molecule().from_adjacency_list(""" multiplicity 2 1 *2 C u0 p0 c0 {2,S} {3,S} {4,S} {5,S} 2 C u0 p0 c0 {1,S} {6,S} {7,S} {8,S} 3 *3 C u1 p0 c0 {1,S} {9,S} {10,S} 4 *1 S u0 p2 c0 {1,S} {11,S} 5 H u0 p0 c0 {1,S} 6 H u0 p0 c0 {2,S} 7 H u0 p0 c0 {2,S} 8 H u0 p0 c0 {2,S} 9 H u0 p0 c0 {3,S} 10 H u0 p0 c0 {3,S} 11 H u0 p0 c0 {4,S} """) products = family.apply_recipe(reactants) self.assertEqual(len(products), 1) mapping = {} for label, atom in expected_product.get_all_labeled_atoms().items(): mapping[atom] = products[0].get_labeled_atoms(label)[0] self.assertTrue(expected_product.is_isomorphic(products[0], mapping))
def test_6_membered_central_cc_shift(self): """ Test that the 6_membered_central_C-C_shift family returns a properly re-labeled product structure. This family is its own reverse. """ family = self.database.families['6_membered_central_C-C_shift'] reactants = [ Molecule().from_adjacency_list(""" 1 *3 C u0 p0 c0 {2,S} {3,S} {7,S} {8,S} 2 *4 C u0 p0 c0 {1,S} {4,S} {9,S} {10,S} 3 *2 C u0 p0 c0 {1,S} {5,T} 4 *5 C u0 p0 c0 {2,S} {6,T} 5 *1 C u0 p0 c0 {3,T} {11,S} 6 *6 C u0 p0 c0 {4,T} {12,S} 7 H u0 p0 c0 {1,S} 8 H u0 p0 c0 {1,S} 9 H u0 p0 c0 {2,S} 10 H u0 p0 c0 {2,S} 11 H u0 p0 c0 {5,S} 12 H u0 p0 c0 {6,S} """) ] expected_product = Molecule().from_adjacency_list(""" 1 *3 C u0 p0 c0 {2,S} {5,D} {7,S} 2 *4 C u0 p0 c0 {1,S} {6,D} {8,S} 3 *1 C u0 p0 c0 {5,D} {9,S} {10,S} 4 *6 C u0 p0 c0 {6,D} {11,S} {12,S} 5 *2 C u0 p0 c0 {1,D} {3,D} 6 *5 C u0 p0 c0 {2,D} {4,D} 7 H u0 p0 c0 {1,S} 8 H u0 p0 c0 {2,S} 9 H u0 p0 c0 {3,S} 10 H u0 p0 c0 {3,S} 11 H u0 p0 c0 {4,S} 12 H u0 p0 c0 {4,S} """) products = family.apply_recipe(reactants) self.assertEqual(len(products), 1) mapping = {} for label, atom in expected_product.get_all_labeled_atoms().items(): mapping[atom] = products[0].get_labeled_atoms(label)[0] self.assertTrue(expected_product.is_isomorphic(products[0], mapping))
def test_react_benzene_bond(self): """ Test that hydrogen addition to benzene (w/ benzene bonds) returns kekulized product. """ family = self.database.families['R_Addition_MultipleBond'] reactants = [ Molecule().from_adjacency_list(""" 1 *1 C u0 p0 c0 {2,B} {6,B} {7,S} 2 *2 C u0 p0 c0 {1,B} {3,B} {8,S} 3 C u0 p0 c0 {2,B} {4,B} {9,S} 4 C u0 p0 c0 {3,B} {5,B} {10,S} 5 C u0 p0 c0 {4,B} {6,B} {11,S} 6 C u0 p0 c0 {1,B} {5,B} {12,S} 7 H u0 p0 c0 {1,S} 8 H u0 p0 c0 {2,S} 9 H u0 p0 c0 {3,S} 10 H u0 p0 c0 {4,S} 11 H u0 p0 c0 {5,S} 12 H u0 p0 c0 {6,S} """), Molecule().from_adjacency_list("1 *3 H u1 p0 c0") ] expected_product = Molecule().from_adjacency_list(""" multiplicity 2 1 C u0 p0 c0 {2,S} {6,S} {7,S} {13,S} 2 C u1 p0 c0 {1,S} {3,S} {8,S} 3 C u0 p0 c0 {2,S} {4,D} {9,S} 4 C u0 p0 c0 {3,D} {5,S} {10,S} 5 C u0 p0 c0 {4,S} {6,D} {11,S} 6 C u0 p0 c0 {1,S} {5,D} {12,S} 7 H u0 p0 c0 {1,S} 8 H u0 p0 c0 {2,S} 9 H u0 p0 c0 {3,S} 10 H u0 p0 c0 {4,S} 11 H u0 p0 c0 {5,S} 12 H u0 p0 c0 {6,S} 13 H u0 p0 c0 {1,S} """) products = family.apply_recipe(reactants) self.assertEqual(len(products), 1) self.assertTrue(expected_product.is_isomorphic(products[0]))
def test_react_benzene_bond2(self): """ Test that hydrogen addition to phenanthrene (w/ benzene bonds) returns kekulized product. """ family = self.database.families['R_Addition_MultipleBond'] reactants = [ Molecule().from_adjacency_list(""" 1 *1 C u0 p0 c0 {2,B} {3,B} {6,B} 2 *2 C u0 p0 c0 {1,B} {4,B} {9,B} 3 C u0 p0 c0 {1,B} {5,B} {7,B} 4 C u0 p0 c0 {2,B} {8,B} {10,B} 5 C u0 p0 c0 {3,B} {11,B} {17,S} 6 C u0 p0 c0 {1,B} {12,B} {18,S} 7 C u0 p0 c0 {3,B} {8,B} {19,S} 8 C u0 p0 c0 {4,B} {7,B} {20,S} 9 C u0 p0 c0 {2,B} {13,B} {21,S} 10 C u0 p0 c0 {4,B} {14,B} {23,S} 11 C u0 p0 c0 {5,B} {12,B} {15,S} 12 C u0 p0 c0 {6,B} {11,B} {16,S} 13 C u0 p0 c0 {9,B} {14,B} {22,S} 14 C u0 p0 c0 {10,B} {13,B} {24,S} 15 H u0 p0 c0 {11,S} 16 H u0 p0 c0 {12,S} 17 H u0 p0 c0 {5,S} 18 H u0 p0 c0 {6,S} 19 H u0 p0 c0 {7,S} 20 H u0 p0 c0 {8,S} 21 H u0 p0 c0 {9,S} 22 H u0 p0 c0 {13,S} 23 H u0 p0 c0 {10,S} 24 H u0 p0 c0 {14,S} """), Molecule().from_adjacency_list("1 *3 H u1 p0 c0") ] expected_product = Molecule().from_adjacency_list(""" multiplicity 2 1 *1 C u0 p0 c0 {2,S} {3,S} {5,S} {15,S} 2 *2 C u1 p0 c0 {1,S} {4,S} {8,S} 3 C u0 p0 c0 {1,S} {6,S} {7,D} 4 C u0 p0 c0 {2,S} {9,D} {10,S} 5 C u0 p0 c0 {1,S} {11,D} {16,S} 6 C u0 p0 c0 {3,S} {12,D} {19,S} 7 C u0 p0 c0 {3,D} {9,S} {20,S} 8 C u0 p0 c0 {2,S} {13,D} {22,S} 9 C u0 p0 c0 {4,D} {7,S} {21,S} 10 C u0 p0 c0 {4,S} {14,D} {24,S} 11 C u0 p0 c0 {5,D} {12,S} {18,S} 12 C u0 p0 c0 {6,D} {11,S} {17,S} 13 C u0 p0 c0 {8,D} {14,S} {23,S} 14 C u0 p0 c0 {10,D} {13,S} {25,S} 15 *3 H u0 p0 c0 {1,S} 16 H u0 p0 c0 {5,S} 17 H u0 p0 c0 {12,S} 18 H u0 p0 c0 {11,S} 19 H u0 p0 c0 {6,S} 20 H u0 p0 c0 {7,S} 21 H u0 p0 c0 {9,S} 22 H u0 p0 c0 {8,S} 23 H u0 p0 c0 {13,S} 24 H u0 p0 c0 {10,S} 25 H u0 p0 c0 {14,S} """) products = family.apply_recipe(reactants) self.assertEqual(len(products), 1) self.assertTrue(expected_product.is_isomorphic(products[0]))
def opt_conf(i, rmsd_cutoff): """ A helper function to optimize the geometry of a conformer. Only for use within this parent function """ conformer = conformers[i] calculator = conformer.ase_molecule.get_calculator() labels = [] for bond in conformer.get_bonds(): labels.append(bond.atom_indices) if isinstance(conformer, TS): label = conformer.reaction_label ind1 = conformer.rmg_molecule.get_labeled_atoms("*1")[0].sorting_label ind2 = conformer.rmg_molecule.get_labeled_atoms("*3")[0].sorting_label labels.append([ind1, ind2]) type = 'ts' else: label = conformer.smiles type = 'species' if isinstance(calc, FileIOCalculator): if calculator.directory: directory = calculator.directory else: directory = 'conformer_logs' calculator.label = "{}_{}".format(conformer.smiles, i) calculator.directory = os.path.join(directory, label,'{}_{}'.format(conformer.smiles, i)) if not os.path.exists(calculator.directory): try: os.makedirs(calculator.directory) except OSError: logging.info("An error occured when creating {}".format(calculator.directory)) calculator.atoms = conformer.ase_molecule conformer.ase_molecule.set_calculator(calculator) opt = BFGS(conformer.ase_molecule, logfile=None) if type == 'species': if isinstance(i,int): c = FixBondLengths(labels) conformer.ase_molecule.set_constraint(c) try: opt.run(steps=1e6) except RuntimeError: logging.info("Optimization failed...we will use the unconverged geometry") pass if str(i) == 'ref': conformer.update_coords_from("ase") try: rmg_mol = Molecule() rmg_mol.from_xyz( conformer.ase_molecule.arrays["numbers"], conformer.ase_molecule.arrays["positions"] ) if not rmg_mol.is_isomorphic(reference_mol): logging.info("{}_{} is not isomorphic with reference mol".format(conformer,str(i))) return False except AtomTypeError: logging.info("Could not create a RMG Molecule from optimized conformer coordinates...assuming not isomorphic") return False if type == 'ts': c = FixBondLengths(labels) conformer.ase_molecule.set_constraint(c) try: opt.run(fmax=0.20, steps=1e6) except RuntimeError: logging.info("Optimization failed...we will use the unconverged geometry") pass conformer.update_coords_from("ase") energy = get_energy(conformer) conformer.energy = energy if len(return_dict)>0: conformer_copy = conformer.copy() for index,post in return_dict.items(): conf_copy = conformer.copy() conf_copy.ase_molecule.positions = post conf_copy.update_coords_from("ase") rmsd = rdMolAlign.GetBestRMS(conformer_copy.rdkit_molecule,conf_copy.rdkit_molecule) if rmsd <= rmsd_cutoff: return True if str(i) != 'ref': return_dict[i] = conformer.ase_molecule.get_positions() return True