def test_failing_files(): fn = 'zzz.zzz' with pytest.raises(IOError): chemper_utils.get_full_path(fn) with pytest.raises(IOError): chemper_utils.get_data_path(fn)
def check_mol_file(file_name): """ Parameters ---------- file_name - str path to a molecule file Returns ------- path - str absolute path to a molecule file raises error if file isn't available """ # is it a local file? if os.path.exists(file_name): return os.path.abspath(file_name) path = get_data_path(os.path.join('molecules', file_name)) if not os.path.exists(path): raise IOError( "Molecule file (%s) was not found locally or in chemper/data/molecules" % file_name) return path
def mols_from_mol2(mol2_file): """ Parses a mol2 file into chemper molecules using RDKit This is a hack for separating mol2 files taken from a Source Forge discussion here: https://www.mail-archive.com/[email protected]/msg01510.html It splits up a mol2 file into blocks and then uses RDKit to parse those blocks Parameters ---------- mol2_file: str relative or absolute path to a mol2 file you want to parse accessible form the current directory Returns ------- mols: list of chemper Mols list of molecules in the mol2 file as chemper molecules """ # TODO: check that this works with mol2 files with a single molecule # TODO: figure out if @<TRIPOS>MOLECULE is the only delimiter acceptable in this file type import os if not os.path.exists(mol2_file): from chemper.chemper_utils import get_data_path mol_path = get_data_path(os.path.join('molecules', mol2_file)) if not os.path.exists(mol_path): raise IOError("File '%s' not found locally or in chemper/data/molecules." % mol_file) else: mol2_file = mol_path delimiter="@<TRIPOS>MOLECULE" if mol2_file.split('.')[-1] != "mol2": raise IOError("File '%s' is not a mol2 file" % mol2_file) if not os.path.exists(mol2_file): raise IOError("File '%s' not found." % mol2_file) molecules = list() mol2_block = list() file_open = open(mol2_file, 'r') for line in file_open: if line.startswith(delimiter) and mol2_block: rdmol = Chem.MolFromMol2Block("".join(mol2_block)) if rdmol is not None: molecules.append(Mol(rdmol)) mol2_block = [] mol2_block.append(line) if mol2_block: rdmol = Chem.MolFromMol2Block("".join(mol2_block)) if rdmol is not None: molecules.append(Mol(rdmol)) file_open.close() return molecules
def test_valid_files(fn): import chemper import os ref_path = os.path.join(chemper.__path__[0], 'data') ref_path = os.path.join(ref_path, fn) data_path = chemper_utils.get_data_path(fn) assert data_path == ref_path full_path = chemper_utils.get_full_path(fn) assert full_path == data_path assert full_path == ref_path
def mols_from_file(mol_file): """ Parses a standard molecule file into chemper molecules using OpenEye toolkits Parameters ---------- mol_file: str relative or full path to molecule containing the molecule file that is accessible from the current working directory Returns ------- mols: list of chemper Mols list of molecules in the mol2 file as chemper Mols """ import os if not os.path.exists(mol_file): from chemper.chemper_utils import get_data_path mol_path = get_data_path(os.path.join('molecules', mol_file)) if not os.path.exists(mol_path): raise IOError( "File '%s' not found locally or in chemper/data/molecules." % mol_file) else: mol_file = mol_path molecules = list() # make Openeye input file stream ifs = oechem.oemolistream(mol_file) oemol = oechem.OECreateOEGraphMol() while oechem.OEReadMolecule(ifs, oemol): # if an SD file, the molecule name may be in the SD tags if oemol.GetTitle() == '': name = oechem.OEGetSDData(oemol, 'name').strip() oemol.SetTitle(name) # Append to list. molecules.append(Mol(oechem.OEMol(oemol))) ifs.close() return molecules
def test_matching_smirks(smirks1, smirks2, checks): path = chemper_utils.get_data_path( os.path.join('molecules', 'MiniDrugBank_tripos.mol2')) mols = mol_toolkit.mols_from_mol2(path) did_it_work = chemper_utils.check_smirks_agree(smirks1, smirks2, mols) assert did_it_work == checks
def test_bond_exception(toolkit): with pytest.raises(TypeError): toolkit.Bond(None) @pytest.mark.parametrize('toolkit', mts) def test_mol_exception(toolkit): with pytest.raises(TypeError): toolkit.Mol(None) # ------------------------------- # check molecule file parsers # ------------------------------- mol2_abs_file = chemper_utils.get_data_path('molecules/MiniDrugBank_tripos.mol2') mol2_rel_path = 'MiniDrugBank_tripos.mol2' paths = [mol2_abs_file, mol2_rel_path] # For the following functions, we will test default behavior and # look for exceptions based on the available mol toolkit @pytest.mark.parametrize('toolkit,path', itertools.product(mts, paths)) def test_file_parsing(toolkit,path): mols = toolkit.mols_from_mol2(path) assert len(mols) == 363 @pytest.mark.parametrize('path', paths) def test_mols_specified_toolkit(path):