def test_get_physicochemical_or_spatial_features_dict( self, structure_klifs_id): """ Test if physicochemical an spatial features dictionary has correct keys. """ pocket_bp = PocketBioPython.from_structure_klifs_id( structure_klifs_id, LOCAL) pocket_df = PocketDataFrame.from_structure_klifs_id( structure_klifs_id, LOCAL) fingerprint = Fingerprint() # Physicochemical features physicochemical_dict = fingerprint._get_physicochemical_features_dict( pocket_bp) assert isinstance(physicochemical_dict, dict) assert list( physicochemical_dict.keys()) == FEATURE_NAMES_PHYSICOCHEMICAL_DICT # Spatial features spatial_dict = fingerprint._get_spatial_features_dict(pocket_df) assert isinstance(spatial_dict, dict) assert list(spatial_dict.keys()) == FEATURE_NAMES_SPATIAL_DICT assert list(spatial_dict["distances"].keys() ) == FEATURE_NAMES_DISTANCES_AND_MOMENTS assert list(spatial_dict["moments"].keys() ) == FEATURE_NAMES_DISTANCES_AND_MOMENTS
def test_to_from_json(self, structure_klifs_id): """ Test if saving/loading a fingerprint to/from a json file. """ fingerprint = Fingerprint.from_structure_klifs_id( structure_klifs_id, LOCAL) json_filepath = Path("fingerprint.json") with enter_temp_directory(): # Save json file fingerprint.to_json(json_filepath) assert json_filepath.exists() # Load json file fingerprint_reloaded = Fingerprint.from_json(json_filepath) # Test if class attributes from ID and from json are the same assert fingerprint.structure_klifs_id == fingerprint_reloaded.structure_klifs_id assert np.allclose( fingerprint.values_array(True, True, True), fingerprint_reloaded.values_array(True, True, True), rtol=0, atol=0, equal_nan=True, ) assert fingerprint.residue_ids == fingerprint_reloaded.residue_ids assert fingerprint.residue_ixs == fingerprint_reloaded.residue_ixs
def test_from_structure_klifs_id(self, structure_klifs_id, fingerprint): """ Test if Fingerprint can be set locally and remotely. """ fingerprint1 = Fingerprint.from_structure_klifs_id( structure_klifs_id, LOCAL) fingerprint2 = Fingerprint.from_structure_klifs_id( structure_klifs_id, REMOTE) if fingerprint is None: assert fingerprint1 is None assert fingerprint2 is None else: assert isinstance(fingerprint1, Fingerprint) assert isinstance(fingerprint2, Fingerprint) # Check if locally and remotely obtained fingerprints are the same # Use method values_array() assert np.allclose( fingerprint1.values_array(True, True, True), fingerprint2.values_array(True, True, True), rtol=0, atol=0, equal_nan=True, ) # Test attributes # Attribute structure_klifs_id assert fingerprint1.structure_klifs_id == structure_klifs_id assert fingerprint2.structure_klifs_id == structure_klifs_id # Attribute values_dict assert list(fingerprint1.values_dict.keys()) == FEATURE_NAMES assert (list(fingerprint1.values_dict["physicochemical"].keys()) == FEATURE_NAMES_PHYSICOCHEMICAL_DICT) assert list(fingerprint1.values_dict["spatial"].keys() ) == FEATURE_NAMES_SPATIAL_DICT assert (list( fingerprint1.values_dict["spatial"]["distances"].keys()) == FEATURE_NAMES_DISTANCES_AND_MOMENTS) assert (list(fingerprint1.values_dict["spatial"]["moments"].keys()) == FEATURE_NAMES_DISTANCES_AND_MOMENTS) assert (list( fingerprint1.values_dict["spatial"]["subpocket_centers"].keys( )) == FEATURE_NAMES_DISTANCES_AND_MOMENTS) # Attribute residue_ids assert fingerprint1.residue_ids == fingerprint2.residue_ids # Attribute residue_ixs assert fingerprint1.residue_ixs == fingerprint2.residue_ixs # Attribute subpocket_centers assert isinstance(fingerprint1.subpocket_centers, pd.DataFrame) assert (fingerprint1.subpocket_centers.columns.to_list() == FEATURE_NAMES_DISTANCES_AND_MOMENTS) assert fingerprint1.subpocket_centers.index.to_list() == [ "x", "y", "z" ]
def from_json(cls, filepath, normalize=False): """ Initialize a FingerprintGenerator object from a json file. Parameters ---------- filepath : str or pathlib.Path Path to json file. normalized : bool Add normalization (default: False). This will store the unnormalized features alongside the normalized features. """ filepath = Path(filepath) with open(filepath, "r") as f: json_string = f.read() fingerprints_list = json.loads(json_string) data = {} for fingerprint_dict in fingerprints_list: fingerprint = Fingerprint._from_dict(fingerprint_dict) data[fingerprint.structure_klifs_id] = fingerprint fingerprint_generator = cls() fingerprint_generator.data = data if normalize: fingerprint_generator.data_normalized = fingerprint_generator._normalize_fingerprints() fingerprint_generator.structure_klifs_ids = list(fingerprint_generator.data.keys()) return fingerprint_generator
def test_fingerprint(self, structure_klifs_id): """ Test if normalized fingerprint can be generated from fingerprint. Includes tests for the method _normalize() used in the class __init__ method. """ fingerprint = Fingerprint.from_structure_klifs_id( structure_klifs_id, LOCAL) fingerprint_normalized = FingerprintNormalized.from_fingerprint( fingerprint)
def test_moments(self, structure_klifs_id): """ Test DataFrame columns/index names. """ fingerprint = Fingerprint.from_structure_klifs_id( structure_klifs_id, LOCAL) assert fingerprint.moments.columns.to_list( ) == FEATURE_NAMES_DISTANCES_AND_MOMENTS assert fingerprint.moments.index.to_list() == [1, 2, 3] assert fingerprint.moments.index.name == "moments"
def test_distances(self, structure_klifs_id): """ Test DataFrame columns/index names. """ fingerprint = Fingerprint.from_structure_klifs_id( structure_klifs_id, LOCAL) assert fingerprint.distances.columns.to_list( ) == FEATURE_NAMES_DISTANCES_AND_MOMENTS assert fingerprint.distances.index.to_list() == list(range(1, 86)) assert fingerprint.distances.index.name == "residue.ix"
def test_physicochemical(self, structure_klifs_id): """ Test DataFrame columns/index names. """ fingerprint = Fingerprint.from_structure_klifs_id( structure_klifs_id, LOCAL) assert fingerprint.physicochemical.columns.to_list( ) == FEATURE_NAMES_PHYSICOCHEMICAL assert fingerprint.physicochemical.index.to_list() == list(range( 1, 86)) assert fingerprint.physicochemical.index.name == "residue.ix"
def _get_fingerprint(self, structure_klifs_id, klifs_session): """ Generate a fingerprint. Parameters ---------- structure_klifs_id : int Structure KLIFS ID. klifs_session : opencadd.databases.klifs.session.Session Local or remote KLIFS session. Returns ------- kissim.encoding.fingerprint Fingerprint. """ logger.info(f"{structure_klifs_id}: Generate fingerprint...") fingerprint = Fingerprint.from_structure_klifs_id(structure_klifs_id, klifs_session) return fingerprint
def test_values_array(self, structure_klifs_id, values_array_mean): """ Tets fingerprint values array. """ fingerprint = Fingerprint.from_structure_klifs_id( structure_klifs_id, LOCAL) values_array_mean_calculated = np.nanmean( fingerprint.values_array(True, True, True)) assert pytest.approx(values_array_mean_calculated, abs=1e-4) == values_array_mean # Test the different lengths of the final fingerprint based on the selection of # physicochemical, distances and moments features. assert fingerprint.values_array(False, False, False).size == 0 assert fingerprint.values_array(True, False, False).size == 680 assert fingerprint.values_array(False, True, False).size == 340 assert fingerprint.values_array(False, False, True).size == 12 assert fingerprint.values_array(True, True, False).size == 1020 assert fingerprint.values_array(True, False, True).size == 692 assert fingerprint.values_array(False, True, True).size == 352 assert fingerprint.values_array(True, True, True).size == 1032
class TestsFeatureDistancesGenerator: """ Test FeatureDistancesGenerator class methods. """ @pytest.mark.parametrize( "feature_weights, structure_ids, kinase_ids", [( None, ["HUMAN/ABL1_2g2i_chainA", "HUMAN/AAK1_4wsq_altA_chainB"], ["AAK1", "ABL1"], )], ) def test_from_fingerprints( self, fingerprint_generator, feature_weights, structure_ids, kinase_ids, ): """ Test FeatureDistancesGenerator class attributes. """ # Test FeatureDistancesGenerator class attributes feature_distances_generator = FeatureDistancesGenerator.from_fingerprint_generator( fingerprint_generator) assert isinstance(feature_distances_generator, FeatureDistancesGenerator) # Test attributes assert isinstance(feature_distances_generator.data, pd.DataFrame) assert (feature_distances_generator.data.columns.to_list() == [ "structure.1", "structure.2", "kinase.1", "kinase.2", ] + [f"distance.{i}" for i in range(1, 16)] + [f"bit_coverage.{i}" for i in range(1, 16)]) assert isinstance(feature_distances_generator.structure_kinase_ids, list) @pytest.mark.parametrize( "structure_klifs_ids, klifs_session, n_cores", [ ([110, 118], REMOTE, 1), ([110, 118], REMOTE, 2), ([110, 118], LOCAL, 1), ([110, 118], LOCAL, 2), ([110, 118], None, None), ], ) def test_from_structure_klifs_ids(self, structure_klifs_ids, klifs_session, n_cores): # Test FeatureDistancesGenerator class attributes feature_distances_generator = FeatureDistancesGenerator.from_structure_klifs_ids( structure_klifs_ids, klifs_session, n_cores) assert isinstance(feature_distances_generator, FeatureDistancesGenerator) # Test attributes assert isinstance(feature_distances_generator.data, pd.DataFrame) assert isinstance(feature_distances_generator.structure_kinase_ids, list) @pytest.mark.parametrize( "structure_kinase_ids", [[["pdbA", "kinaseA"], ["pdbB", "kinaseA"], ["pdbC", "kinaseB"]]], ) def test_structure_kinase_ids(self, feature_distances_generator, structure_kinase_ids): assert feature_distances_generator._structure_kinase_ids == structure_kinase_ids @pytest.mark.parametrize( "structure_pair_ids", [[["pdbA", "pdbB"], ["pdbA", "pdbC"], ["pdbB", "pdbC"]]]) def test_structure_pair_ids(self, feature_distances_generator, structure_pair_ids): assert feature_distances_generator.structure_pair_ids == structure_pair_ids @pytest.mark.parametrize( "kinase_pair_ids", [[["kinaseA", "kinaseA"], ["kinaseA", "kinaseB"], ["kinaseA", "kinaseB"]]], ) def test_kinase_pair_ids(self, feature_distances_generator, kinase_pair_ids): assert feature_distances_generator.kinase_pair_ids == kinase_pair_ids @pytest.mark.parametrize("structure_ids", [["pdbA", "pdbB", "pdbC"]]) def test_structure_ids(self, feature_distances_generator, structure_ids): assert feature_distances_generator.structure_ids == structure_ids @pytest.mark.parametrize("kinase_ids", [["kinaseA", "kinaseB"]]) def test_kinase_ids(self, feature_distances_generator, kinase_ids): assert feature_distances_generator.kinase_ids == kinase_ids def test_to_from_csv(self, feature_distances_generator): with enter_temp_directory(): filepath = Path("test.csv") feature_distances_generator.to_csv(filepath) assert filepath.exists() feature_distances_generator_from_csv = FeatureDistancesGenerator.from_csv( filepath) assert isinstance(feature_distances_generator_from_csv, FeatureDistancesGenerator) @pytest.mark.parametrize( "fingerprints, pairs", [( { "a": Fingerprint(), "b": Fingerprint(), "c": Fingerprint() }, [("a", "b"), ("a", "c"), ("b", "c")], )], ) def test_fingerprint_pairs(self, fingerprints, pairs): """ Test calculation of all fingerprint pair combinations from fingerprints dictionary. Parameters ---------- fingerprints : dict of kissim.encoding.Fingerprint Dictionary of fingerprints: Keys are molecule codes and values are fingerprint data. pairs : list of list of str List of molecule code pairs (list). """ generator = FeatureDistancesGenerator() pairs_calculated = generator._fingerprint_pairs(fingerprints) for pair_calculated, pair in zip(pairs_calculated, pairs): assert pair_calculated == pair def test_get_feature_distances(self, fingerprint_generator): """ Test if return type is instance of FeatureDistance class. Parameters ---------- fingerprint_generator : FingerprintGenerator Multiple fingerprints. """ # Get fingerprint pair from FingerprintGenerator pair = list(fingerprint_generator.data.keys())[:2] fingerprints = fingerprint_generator.data # Test feature distance calculation feature_distances_generator = FeatureDistancesGenerator() feature_distances_calculated = feature_distances_generator._get_feature_distances( pair, fingerprints) assert isinstance(feature_distances_calculated, FeatureDistances) def test_get_feature_distances_from_list(self, fingerprint_generator): """ Test if return type is instance of list of FeatureDistance class. Parameters ---------- fingerprint_generator : FingerprintGenerator Multiple fingerprints. """ # Test bulk feature distance calculation generator = FeatureDistancesGenerator() feature_distances_list = generator._get_feature_distances_from_list( generator._get_feature_distances, fingerprint_generator.data, 1) assert isinstance(feature_distances_list, list) for i in feature_distances_list: assert isinstance(i, FeatureDistances)