def test_to_sdf(datadir, tmp_path): data_path = datadir / "TUBB3-observations.sdf.gz" df = dm.read_sdf(data_path, as_df=True) sdf_path = tmp_path / "mols.sdf" dm.to_sdf(df, sdf_path, smiles_column="smiles") new_df = dm.read_sdf(sdf_path, as_df=True) assert df.equals(new_df)
def test_to_sdf_mols(datadir, tmp_path): data_path = datadir / "TUBB3-observations.sdf.gz" mols = dm.read_sdf(data_path, as_df=False) sdf_path = tmp_path / "mols.sdf" dm.to_sdf(mols, sdf_path) new_mols = dm.read_sdf(sdf_path, as_df=False) assert [dm.to_smiles(mol) for mol in mols] == [dm.to_smiles(mol) for mol in new_mols]
def test_read_sdf_gz(datadir): data_path = datadir / "TUBB3-observations.sdf.gz" mols = dm.read_sdf(data_path) assert len(mols) == 10 for mol in mols: assert isinstance(mol, Chem.rdchem.Mol)
def test_sdf_props_and_conformer_preserved(tmp_path): sdf_path = tmp_path / "test.sdf" # Generate an SDF file props = dict(test_int=588, test_str="hello") smiles = "CC1(C2C(C3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O)O" mol = dm.to_mol(smiles) mol = dm.set_mol_props(mol, props) mol = dm.conformers.generate(mol, n_confs=1) pos = mol.GetConformer().GetPositions() dm.to_sdf(mol, sdf_path) # Read sdf file mols = dm.read_sdf(sdf_path) mol = mols[0] # Check properties assert mol.GetPropsAsDict() == props # Check conformer conf = mol.GetConformer() assert mol.GetNumConformers() == 1 assert conf.Is3D() np.testing.assert_almost_equal(conf.GetPositions(), pos, decimal=4)
def test_to_sdf_single_mol(tmp_path): sdf_path = tmp_path / "test.sdf" smiles = "CC1(C2C(C3C(C(=O)C(=C(C3(C(=O)C2=C(C4=C1C=CC=C4O)O)O)O)C(=O)N)N(C)C)O)O" mol = dm.to_mol(smiles) dm.to_sdf(mol, sdf_path) mols = dm.read_sdf(sdf_path) assert dm.to_smiles(mol) == dm.to_smiles(mols[0])
def test_read_sdf_as_df(datadir): data_path = datadir / "TUBB3-observations.sdf" df = dm.read_sdf(data_path, as_df=True) assert df.shape == (10, 12) assert set(df.columns) == { "smiles", "zinc_id", "ortholog_name", "gene_name", "affinity", "chembldocid", "title", "reference.pubmed_id", "reference.doi", "reference.chembl_id", "reference.journal", "reference.year", }
def test_to_df(datadir): data_path = datadir / "TUBB3-observations.sdf" mols = dm.read_sdf(data_path) df = dm.to_df(mols) assert df.shape == (10, 12) assert list(df.columns) == [ "smiles", "zinc_id", "ortholog_name", "gene_name", "affinity", "chembldocid", "title", "reference.pubmed_id", "reference.doi", "reference.chembl_id", "reference.journal", "reference.year", ]
def test_from_df(datadir): data_path = datadir / "TUBB3-observations.sdf" df = dm.read_sdf(data_path, as_df=True) mols = dm.from_df(df) assert len(mols) == 10 assert isinstance(mols[0], Chem.rdchem.Mol) assert set(mols[0].GetPropsAsDict().keys()) == { "zinc_id", "ortholog_name", "gene_name", "affinity", "chembldocid", "title", "reference.pubmed_id", "reference.doi", "reference.chembl_id", "reference.journal", "reference.year", } assert dm.from_df(pd.DataFrame()) == []
def test_read_sdf_as_df_mol_col(datadir): data_path = datadir / "TUBB3-observations.sdf" df = dm.read_sdf(data_path, as_df=True, mol_column="mol") assert "mol" in df.columns assert isinstance(df.iloc[0]["mol"], Chem.rdchem.Mol)