def test_ligand_set(): ligand_set = ligands.LigandSet("mcl1_sample") lig_list = ligand_set.get_list() for key in lig_list: assert key in ligand_set.keys() assert isinstance(ligand_set.get_ligand(key), ligands.Ligand) with pytest.raises(ValueError, match="Ligand xxx is not part of set."): ligand_set.get_ligand("xxx") df = ligand_set.get_dataframe() for i, row in df.iterrows(): test_data = row.loc[ligand_set[row.loc["name"][0]]._data.index] pd.testing.assert_series_equal(ligand_set[row.loc["name"][0]]._data, test_data, check_names=False) df = ligand_set.get_dataframe(columns=["name", "smiles"]) for i, row in df.iterrows(): assert row["name"][0] == ligand_set[row.loc["name"][0]].get_name() assert row["smiles"][0] == ligand_set[row.loc["name"] [0]]._data["smiles"][0] molecules = ligand_set.get_molecules() for name, lig in ligand_set.items(): assert Molecule.are_isomorphic(lig.get_molecule(), molecules[name]) # ToDo: proper test for get_html() ligand_set.get_html() ligand_set.get_html(columns=["name", "smiles"])
def test_target_class(): target = targets.target_dict["mcl1_sample"] tgt = targets.Target(target["name"]) assert tgt.get_name() == target["name"] assert tgt.ligand_data == None assert tgt.html_data == None assert tgt._ligands == None assert tgt._edges == None ligand_set = ligands.LigandSet("mcl1_sample") assert tgt.get_ligand_set().keys() == ligand_set.keys() assert tgt._ligands != None tgt.add_ligand_data() assert type(tgt.ligand_data) == type(pd.Series(dtype=object)) pd.testing.assert_series_equal(tgt.ligand_data, tgt.get_ligand_data()) tgt.ligand_data = None ligand_data = tgt.get_ligand_data() assert type(tgt.ligand_data) == type(pd.Series(dtype=object)) assert ligand_data["numLigands"] == 15 # cannot compare ROMol column (SVG image), that's why we only compare these columns columns = ["name", "smiles", "measurement", "DerivedMeasurement"] df1 = tgt.get_ligand_set_dataframe(columns=columns) df2 = ligand_set.get_dataframe(columns=columns) pd.testing.assert_frame_equal(df1, df2) assert tgt.get_ligand_set_html() == ligand_set.get_html() edge_set = edges.EdgeSet("mcl1_sample") columns = [ "ligand_a", "ligand_b", "Smiles1", "Smiles2", "exp. DeltaG [kcal/mol]", "exp. Error [kcal/mol]", ] pd.testing.assert_frame_equal( tgt.get_edge_set().get_dataframe(columns=columns), edge_set.get_dataframe(columns=columns), ) dict1 = tgt.get_edge_set().get_dict() dict2 = edge_set.get_dict() assert dict1.keys() == dict2.keys() for key, item in dict1.items(): assert item.keys() == dict2[key].keys() for kk, ii in item.items(): if kk != "Mol1" and kk != "Mol2": assert ii == dict2[key][kk] assert tgt.get_edge_set_html() == edge_set.get_html() # TODO: this actually does not test anything, only checks if it works tgt.find_links() tgt.get_html_data() tgt.get_graph()
def test_ligand_class(): for target in targets.target_dict.keys(): ligand_set = ligands.LigandSet(target) for name, lig in ligand_set.items(): assert lig.get_name() == name df = lig.get_dataframe() assert df["name"][0] == name df = lig.get_dataframe(columns=["name"]) assert df["name"][0] == name # ToDo: make proper tests (?) lig.find_links() lig.get_image() lig.get_html() lig.get_html(columns=["name", "smiles"])
def test_derive_observables(): for target in targets.target_dict.keys(): ligand_set = ligands.LigandSet(target) for name, lig in ligand_set.items(): for i, t in enumerate(["dg", "ki", "ic50", "pic50"]): lig.derive_observables(derived_type=t, destination=f"DerivedMeasurement{i}") for original_type in lig._observables: if ("measurement", original_type) in list(lig._data.index): assert lig._data[(f"DerivedMeasurement{i}", t)] == utils.convert_value( lig._data[("measurement", original_type)], original_type=original_type, final_type=t, ) # Test expected exception when trying to convert to unknown observable with pytest.raises( NotImplementedError, match=f"Conversion to observable xxx not possible. " f"Observable must be any of: dg, ki, ic50 or pic50.", ): lig.derive_observables(derived_type="xxx", destination=f"DerivedMeasurement") # Test expected exception when trying to convert from unknown observable for original_type in lig._observables: if ("measurement", original_type) in list(lig._data.index): lig._data.rename({original_type: "xxx"}, inplace=True, level=1) with pytest.raises( ValueError, match=f"No known measured observable found. " f"Measured observable should be any of: dg, ki, ic50 or pic50.", ): lig.derive_observables( derived_type="pic50", destination=f"DerivedMeasurement{i}")
# "lig_65": -8.41, # "lig_66": -8.43, # "lig_67": -7.58, # "lig_68": -7.69, } eps = 0.01 df.index = df.name for key, item in jacs_data.items(): assert (pytest.approx( item, eps) == df.loc[key, ("DerivedMeasurement", "value")].to( utils.unit_registry("kcal / mole")).magnitude) test_set = [] ligand_set = ligands.LigandSet("mcl1_sample") for name, lig in ligand_set.items(): test_set.append(("mcl1_sample", name, lig)) @pytest.mark.parametrize("target, ligand_name, lig", test_set) def test_ligand_data(target, ligand_name, lig): m1 = Chem.MolFromSmiles(lig._data["smiles"][0]) m1 = Chem.AddHs(m1) m2 = Chem.SDMolSupplier( os.path.join( targets.data_path, targets.get_target_dir(target), "02_ligands", ligand_name, "crd",
def test_edge_set(): eps = 0.01 lig_set = ligands.LigandSet("mcl1_sample") edg_set = edges.EdgeSet("mcl1_sample") for k, edg in edg_set.items(): assert f"{edg._data['ligand_a']}" in lig_set.keys() assert f"{edg._data['ligand_b']}" in lig_set.keys() ddg = (lig_set[f"{edg._data['ligand_b']}"]._data[("DerivedMeasurement", "value")] - lig_set[f"{edg._data['ligand_a']}"]._data[("DerivedMeasurement", "value")]) assert (pytest.approx(edg._data["exp. DeltaG [kcal/mol]"].magnitude, eps) == ddg.magnitude) e_ddg = np.sqrt(lig_set[f"{edg._data['ligand_a']}"]._data[ ("DerivedMeasurement", "error")]**2 + lig_set[f"{edg._data['ligand_b']}"]._data[ ("DerivedMeasurement", "error")]**2) assert (pytest.approx(edg._data["exp. Error [kcal/mol]"].magnitude, 0.5) == e_ddg.magnitude) df = edg_set.get_dataframe() for k, edg in df.iterrows(): assert f"{edg['ligand_a']}" in lig_set.keys() assert f"{edg['ligand_b']}" in lig_set.keys() ddg = (lig_set[f"{edg['ligand_b']}"]._data[("DerivedMeasurement", "value")] - lig_set[f"{edg['ligand_a']}"]._data[("DerivedMeasurement", "value")]) assert (pytest.approx(edg["exp. DeltaG [kcal/mol]"].magnitude, eps) == ddg.magnitude) e_ddg = np.sqrt(lig_set[f"{edg['ligand_b']}"]._data[ ("DerivedMeasurement", "error")]**2 + lig_set[f"{edg['ligand_a']}"]._data[ ("DerivedMeasurement", "error")]**2) assert (pytest.approx(edg["exp. Error [kcal/mol]"].magnitude, 0.5) == e_ddg.magnitude) df2 = edg_set.get_dataframe(columns=[ "ligand_a", "ligand_b", "Smiles1", "Smiles2", "exp. DeltaG [kcal/mol]", "exp. Error [kcal/mol]", ]) for k, edg in df2.iterrows(): assert f"{edg['ligand_a']}" in lig_set.keys() assert f"{edg['ligand_b']}" in lig_set.keys() ddg = (lig_set[f"{edg['ligand_b']}"]._data[("DerivedMeasurement", "value")] - lig_set[f"{edg['ligand_a']}"]._data[("DerivedMeasurement", "value")]) assert (pytest.approx(edg["exp. DeltaG [kcal/mol]"].magnitude, eps) == ddg.magnitude) e_ddg = np.sqrt(lig_set[f"{edg['ligand_b']}"]._data[ ("DerivedMeasurement", "error")]**2 + lig_set[f"{edg['ligand_a']}"]._data[ ("DerivedMeasurement", "error")]**2) assert (pytest.approx(edg["exp. Error [kcal/mol]"].magnitude, 0.5) == e_ddg.magnitude) html = edg_set.get_html() html = edg_set.get_html(columns=[ "ligand_a", "ligand_b", "Smiles1", "Smiles2", "exp. DeltaG [kcal/mol]", "exp. Error [kcal/mol]", ]) d = edg_set.get_dict() for edg, ligs in d.items(): assert ( edg == f'edge_{ligs["ligand_a"].replace("lig_", "")}_{ligs["ligand_b"].replace("lig_", "")}' ) assert ligs["ligand_a"] in lig_set.keys() assert ligs["ligand_b"] in lig_set.keys()
def test_edge(): eps = 0.0001 test_dict = {"name": "edge_xxx_yyy", "ligand_a": "xxx", "ligand_b": "yyy"} edg = edges.Edge(test_dict) assert edg.get_name() == "edge_xxx_yyy" assert edg.get_dict() == { "name": "edge_xxx_yyy", "ligand_a": "xxx", "ligand_b": "yyy", } ligand_set = ligands.LigandSet("mcl1_sample") test_dict = { "name": "edge_30_27", "ligand_a": "lig_30", "ligand_b": "lig_27" } edg = edges.Edge(test_dict) assert edg.get_name() == "edge_30_27" assert edg.get_dict() == { "name": "edge_30_27", "ligand_a": "lig_30", "ligand_b": "lig_27", } pd.testing.assert_series_equal( edg.get_dataframe(), pd.Series({ "name": "edge_30_27", "ligand_a": "lig_30", "ligand_b": "lig_27" }), ) edg.add_ligand_data(ligand_set) with pytest.raises(AssertionError): pd.testing.assert_series_equal( edg.get_dataframe(), pd.Series({ "name": "edge_30_27", "ligand_a": "lig_30", "ligand_b": "lig_27" }), ) df = edg.get_dataframe(columns=[ "ligand_a", "ligand_b", "Smiles1", "Smiles2", "exp. DeltaG [kcal/mol]", "exp. Error [kcal/mol]", ]) ddg = (ligand_set[f"{edg._data['ligand_b']}"]._data[("DerivedMeasurement", "value")] - ligand_set[f"{edg._data['ligand_a']}"]._data[("DerivedMeasurement", "value")]) e_ddg = np.sqrt(ligand_set[f"{edg._data['ligand_b']}"]._data[ ("DerivedMeasurement", "error")]**2 + ligand_set[f"{edg._data['ligand_a']}"]._data[ ("DerivedMeasurement", "error")]**2) assert pytest.approx(df["exp. DeltaG [kcal/mol]"].magnitude, eps) == ddg.magnitude assert pytest.approx(df["exp. Error [kcal/mol]"].magnitude, 0.5) == e_ddg.magnitude