Ejemplo n.º 1
0
def test_failing_files():
    fn = 'zzz.zzz'
    with pytest.raises(IOError):
        chemper_utils.get_full_path(fn)

    with pytest.raises(IOError):
        chemper_utils.get_data_path(fn)
Ejemplo n.º 2
0
def check_mol_file(file_name):
    """

    Parameters
    ----------
    file_name - str
        path to a molecule file

    Returns
    -------
    path - str
        absolute path to a molecule file
        raises error if file isn't available
    """
    # is it a local file?
    if os.path.exists(file_name):
        return os.path.abspath(file_name)

    path = get_data_path(os.path.join('molecules', file_name))

    if not os.path.exists(path):
        raise IOError(
            "Molecule file (%s) was not found locally or in chemper/data/molecules"
            % file_name)

    return path
Ejemplo n.º 3
0
def mols_from_mol2(mol2_file):
    """
    Parses a mol2 file into chemper molecules using RDKit

    This is a hack for separating mol2 files taken from a Source Forge discussion here:
    https://www.mail-archive.com/[email protected]/msg01510.html
    It splits up a mol2 file into blocks and then uses RDKit to parse those blocks

    Parameters
    ----------
    mol2_file: str
               relative or absolute path to a mol2 file you want to parse
               accessible form the current directory

    Returns
    -------
    mols: list of chemper Mols
          list of molecules in the mol2 file as chemper molecules
    """
    # TODO: check that this works with mol2 files with a single molecule
    # TODO: figure out if @<TRIPOS>MOLECULE is the only delimiter acceptable in this file type
    import os

    if not os.path.exists(mol2_file):
        from chemper.chemper_utils import get_data_path
        mol_path = get_data_path(os.path.join('molecules', mol2_file))

        if not os.path.exists(mol_path):
            raise IOError("File '%s' not found locally or in chemper/data/molecules." % mol_file)
        else:
            mol2_file = mol_path

    delimiter="@<TRIPOS>MOLECULE"

    if mol2_file.split('.')[-1] != "mol2":
        raise IOError("File '%s' is not a mol2 file" % mol2_file)

    if not os.path.exists(mol2_file):
        raise IOError("File '%s' not found." % mol2_file)

    molecules = list()
    mol2_block = list()

    file_open = open(mol2_file, 'r')

    for line in file_open:
        if line.startswith(delimiter) and mol2_block:
            rdmol = Chem.MolFromMol2Block("".join(mol2_block))
            if rdmol is not None:
                molecules.append(Mol(rdmol))
            mol2_block = []
        mol2_block.append(line)
    if mol2_block:
        rdmol = Chem.MolFromMol2Block("".join(mol2_block))
        if rdmol is not None:
            molecules.append(Mol(rdmol))

    file_open.close()
    return molecules
Ejemplo n.º 4
0
def test_valid_files(fn):
    import chemper
    import os
    ref_path = os.path.join(chemper.__path__[0], 'data')
    ref_path = os.path.join(ref_path, fn)

    data_path = chemper_utils.get_data_path(fn)
    assert data_path == ref_path

    full_path = chemper_utils.get_full_path(fn)
    assert full_path == data_path
    assert full_path == ref_path
Ejemplo n.º 5
0
def mols_from_file(mol_file):
    """
    Parses a standard molecule file into chemper molecules using OpenEye toolkits

    Parameters
    ----------
    mol_file: str
              relative or full path to molecule containing the molecule file
              that is accessible from the current working directory

    Returns
    -------
    mols: list of chemper Mols
          list of molecules in the mol2 file as chemper Mols
    """
    import os
    if not os.path.exists(mol_file):
        from chemper.chemper_utils import get_data_path
        mol_path = get_data_path(os.path.join('molecules', mol_file))

        if not os.path.exists(mol_path):
            raise IOError(
                "File '%s' not found locally or in chemper/data/molecules." %
                mol_file)
        else:
            mol_file = mol_path

    molecules = list()

    # make Openeye input file stream
    ifs = oechem.oemolistream(mol_file)

    oemol = oechem.OECreateOEGraphMol()
    while oechem.OEReadMolecule(ifs, oemol):
        # if an SD file, the molecule name may be in the SD tags
        if oemol.GetTitle() == '':
            name = oechem.OEGetSDData(oemol, 'name').strip()
            oemol.SetTitle(name)
        # Append to list.
        molecules.append(Mol(oechem.OEMol(oemol)))
    ifs.close()

    return molecules
Ejemplo n.º 6
0
def test_matching_smirks(smirks1, smirks2, checks):
    path = chemper_utils.get_data_path(
        os.path.join('molecules', 'MiniDrugBank_tripos.mol2'))
    mols = mol_toolkit.mols_from_mol2(path)
    did_it_work = chemper_utils.check_smirks_agree(smirks1, smirks2, mols)
    assert did_it_work == checks
Ejemplo n.º 7
0
def test_bond_exception(toolkit):
    with pytest.raises(TypeError):
        toolkit.Bond(None)


@pytest.mark.parametrize('toolkit', mts)
def test_mol_exception(toolkit):
    with pytest.raises(TypeError):
        toolkit.Mol(None)


# -------------------------------
# check molecule file parsers
# -------------------------------

mol2_abs_file = chemper_utils.get_data_path('molecules/MiniDrugBank_tripos.mol2')
mol2_rel_path = 'MiniDrugBank_tripos.mol2'
paths = [mol2_abs_file, mol2_rel_path]


# For the following functions, we will test default behavior and
# look for exceptions based on the available mol toolkit

@pytest.mark.parametrize('toolkit,path', itertools.product(mts, paths))
def test_file_parsing(toolkit,path):
    mols = toolkit.mols_from_mol2(path)
    assert len(mols) == 363


@pytest.mark.parametrize('path', paths)
def test_mols_specified_toolkit(path):