def test_load_xyzfile(xyz_path): path = os.path.join(xyz_path, '1_opt.xyz') m = Molecule(path, 'xyz') assert m.creator == ('XYZ', path) assert isinstance(m.pybel_molecule, pybel.Molecule) assert m.xyz.geometry.shape == (28, 3) m.to_smiles() assert m.smiles == 'c1ccc(CC2CCCC2)cc1' m.to_smarts() assert m.smarts == '[#6]1-[#6]-[#6]-[#6](-[#6]-1)-[#6]-[#6]1:[#6]:[#6]:[#6]:[#6]:[#6]:1' m.to_inchi() assert m.inchi == 'InChI=1S/C12H16/c1-2-6-11(7-3-1)10-12-8-4-5-9-12/h1-3,6-7,12H,4-5,8-10H2'
def load_xyz_polarizability(): """Load and return xyz files and polarizability (Bohr^3). The xyz coordinates of small organic molecules are optimized with BP86/def2svp level of theory. Polarizability of the molecules are also calcualted in the same level of thoery. ================= ====================== rows 50 Columns 1 header polarizability molecules rep. xyz Features 0 Returns 1 dataframe and 1 dict ================= ====================== Returns ------- molecules : list The list of molecule objects with xyz coordinates. pol : pandas dataframe The polarizability of each molecule as a column of dataframe. Examples -------- >>> from chemml.datasets import load_xyz_polarizability >>> molecules, polarizabilities = load_xyz_polarizability() >>> print(len(molecules)) 50 >>> print(polarizabilities.shape) (50, 1) """ DATA_PATH = pkg_resources.resource_filename('chemml', os.path.join('datasets','data','organic_xyz')) # from chemml.initialization import XYZreader # reader = XYZreader(path_pattern=['[1-9]_opt.xyz', '[1-9][0-9]_opt.xyz'], # path_root=DATA_PATH, # reader='manual', # skip_lines=[2, 0]) # molecules = reader.read() molecules = [] for i in range(1,51): molecule = Molecule(os.path.join(DATA_PATH,"%i_opt.xyz"%i), "xyz") molecule.to_smiles() molecule.pybel_molecule = None molecules.append(molecule) df = pd.read_csv(os.path.join(DATA_PATH,'pol.csv')) return molecules, df
def test_hydrogens(caffeine_smiles, caffeine_canonical, caffeine_inchi): m = Molecule(caffeine_smiles, 'smiles') m.to_smiles() assert isinstance(m.rdkit_molecule, Chem.Mol) assert m.smiles == caffeine_canonical # add hydrogens m.hydrogens('add', explicitOnly=False) # canonical smiles with hydrogens m.to_smiles() assert m.smiles == "[H]c1nc2c(c(=O)n(C([H])([H])[H])c(=O)n2C([H])([H])[H])n1C([H])([H])[H]" # test inchi m = Molecule(caffeine_inchi, 'inchi') m.to_inchi() assert m.inchi == caffeine_inchi # add hydrogens m.hydrogens('add') m.to_inchi() assert m.inchi == caffeine_inchi
def test_mol2smiles(caffeine_smiles, caffeine_canonical, caffeine_kekulize): m = Molecule(caffeine_smiles, 'smiles') m.to_smiles() assert isinstance(m.rdkit_molecule, Chem.Mol) assert m.smiles == caffeine_canonical m.to_smiles(kekuleSmiles=True) assert m.smiles == caffeine_kekulize # kekulize takes priority over canonical m.to_smiles(kekuleSmiles=True) assert m.smiles == caffeine_kekulize # if kekule is False, everything is OK m.to_smiles(canonical=True) assert m.smiles == caffeine_canonical
def test_mol2smiles_defaultargs(caffeine_smiles): m = Molecule(caffeine_smiles, 'smiles') m.to_smiles() m.to_smiles(**m._default_rdkit_smiles_args) # all the args & canonical=False args = m._default_rdkit_smiles_args args['canonical'] = False m.to_smiles(**args) assert m.smiles_args['canonical'] is False