Example #1
0
def test_load_xyzfile(xyz_path):
    path = os.path.join(xyz_path, '1_opt.xyz')
    m = Molecule(path, 'xyz')
    assert m.creator == ('XYZ', path)
    assert isinstance(m.pybel_molecule, pybel.Molecule)
    assert m.xyz.geometry.shape == (28, 3)
    m.to_smiles()
    assert m.smiles == 'c1ccc(CC2CCCC2)cc1'
    m.to_smarts()
    assert m.smarts == '[#6]1-[#6]-[#6]-[#6](-[#6]-1)-[#6]-[#6]1:[#6]:[#6]:[#6]:[#6]:[#6]:1'
    m.to_inchi()
    assert m.inchi == 'InChI=1S/C12H16/c1-2-6-11(7-3-1)10-12-8-4-5-9-12/h1-3,6-7,12H,4-5,8-10H2'
Example #2
0
def load_xyz_polarizability():
    """Load and return xyz files and polarizability (Bohr^3).
    The xyz coordinates of small organic molecules are optimized with BP86/def2svp level of theory.
    Polarizability of the molecules are also calcualted in the same level of thoery.

    =================   ======================
    rows                                    50
    Columns                                  1
    header                      polarizability
    molecules rep.                         xyz
    Features                                 0
    Returns             1 dataframe and 1 dict
    =================   ======================

    Returns
    -------
    molecules : list
        The list of molecule objects with xyz coordinates.

    pol : pandas dataframe
        The polarizability of each molecule as a column of dataframe.

    Examples
    --------
    >>> from chemml.datasets import load_xyz_polarizability
    >>> molecules, polarizabilities = load_xyz_polarizability()
    >>> print(len(molecules))
    50
    >>> print(polarizabilities.shape)
    (50, 1)
    """
    DATA_PATH = pkg_resources.resource_filename('chemml', os.path.join('datasets','data','organic_xyz'))
    # from chemml.initialization import XYZreader
    # reader = XYZreader(path_pattern=['[1-9]_opt.xyz', '[1-9][0-9]_opt.xyz'],
    #                    path_root=DATA_PATH,
    #                    reader='manual',
    #                    skip_lines=[2, 0])
    # molecules = reader.read()
    molecules = []
    for i in range(1,51):
        molecule = Molecule(os.path.join(DATA_PATH,"%i_opt.xyz"%i), "xyz")
        molecule.to_smiles()
        molecule.pybel_molecule = None
        molecules.append(molecule)

    df = pd.read_csv(os.path.join(DATA_PATH,'pol.csv'))
    return molecules, df
Example #3
0
def test_hydrogens(caffeine_smiles, caffeine_canonical, caffeine_inchi):
    m = Molecule(caffeine_smiles, 'smiles')
    m.to_smiles()
    assert isinstance(m.rdkit_molecule, Chem.Mol)
    assert m.smiles == caffeine_canonical
    # add hydrogens
    m.hydrogens('add', explicitOnly=False)
    # canonical smiles with hydrogens
    m.to_smiles()
    assert m.smiles == "[H]c1nc2c(c(=O)n(C([H])([H])[H])c(=O)n2C([H])([H])[H])n1C([H])([H])[H]"
    # test inchi
    m = Molecule(caffeine_inchi, 'inchi')
    m.to_inchi()
    assert m.inchi == caffeine_inchi
    # add hydrogens
    m.hydrogens('add')
    m.to_inchi()
    assert m.inchi == caffeine_inchi
Example #4
0
def test_mol2smiles(caffeine_smiles, caffeine_canonical, caffeine_kekulize):
    m = Molecule(caffeine_smiles, 'smiles')
    m.to_smiles()
    assert isinstance(m.rdkit_molecule, Chem.Mol)
    assert m.smiles == caffeine_canonical
    m.to_smiles(kekuleSmiles=True)
    assert m.smiles == caffeine_kekulize
    # kekulize takes priority over canonical
    m.to_smiles(kekuleSmiles=True)
    assert m.smiles == caffeine_kekulize
    # if kekule is False, everything is OK
    m.to_smiles(canonical=True)
    assert m.smiles == caffeine_canonical
Example #5
0
def test_mol2smiles_defaultargs(caffeine_smiles):
    m = Molecule(caffeine_smiles, 'smiles')
    m.to_smiles()
    m.to_smiles(**m._default_rdkit_smiles_args)
    # all the args & canonical=False
    args = m._default_rdkit_smiles_args
    args['canonical'] = False
    m.to_smiles(**args)
    assert m.smiles_args['canonical'] is False