def test_get_feature_distances_and_bit_coverages(self, feature1, feature2, distance, bit_coverage): """ Test if feature distance and bit coverage is correct for given feature bits. Parameters ---------- feature1 : pd.Series Feature bits for a given feature in fingerprint 1. feature2 : pd.Series Feature bits for a given feature in fingerprint 2. distance : float Distance value for a feature pair. bit_coverage : float Bit coverage value for a feature pair. """ feature_distances = FeatureDistances() ( distance_calculated, bit_coverage_calculated, ) = feature_distances._get_feature_distances_and_bit_coverages( feature1, feature2) assert np.isclose(distance_calculated, distance, rtol=1e-04) assert np.isclose(bit_coverage_calculated, bit_coverage, rtol=1e-04)
def test_from_fingerprints(self, fingerprint_generator): """ Test data type and dimensions of feature distances between two fingerprints. Parameters ---------- fingerprint_generator : FingerprintGenerator Multiple fingerprints. """ # Fingerprints fingerprints = list(fingerprint_generator.data.values()) # Get feature distances feature_distances = FeatureDistances.from_fingerprints( fingerprint1=fingerprints[0], fingerprint2=fingerprints[1]) # Class attribute types and dimensions correct? assert isinstance(feature_distances.structure_pair_ids, tuple) assert len(feature_distances.structure_pair_ids) == 2 assert isinstance(feature_distances.distances, np.ndarray) assert len(feature_distances.distances) == 15 assert isinstance(feature_distances.bit_coverages, np.ndarray) assert len(feature_distances.bit_coverages) == 15 # Class property type and dimension correct? assert isinstance(feature_distances.data, pd.DataFrame) feature_type_dimension_calculated = feature_distances.data.groupby( by="feature_type", sort=False).size() feature_type_dimension = pd.Series( [8, 4, 3], index="physicochemical distances moments".split()) assert all(feature_type_dimension_calculated == feature_type_dimension)
def test_calculate_feature_distance_valueerror(self, feature_pair, distance_measure): """ Test ValueError exceptions in distance calculation for two value (feature) lists. Parameters ---------- feature_pair : np.ndarray Pairwise bits of one feature extracted from two fingerprints (only bit positions without any NaN value). distance_measure : str Type of distance measure, defaults to Euclidean distance. """ with pytest.raises(ValueError): feature_distance = FeatureDistances() feature_distance._calculate_feature_distance( feature_pair, distance_measure)
def test_get_feature_distances_and_bit_coverages_valueerror( self, feature1, feature2): """ Test ValueError exceptions in feature distance calculation. Parameters ---------- feature1 : np.ndarray Feature bits for a given feature in fingerprint 1. feature2 : np.ndarray Feature bits for a given feature in fingerprint 2. """ feature_distances = FeatureDistances() with pytest.raises(ValueError): feature_distances._get_feature_distances_and_bit_coverages( feature1, feature2)
def test_from_dict(self, feature_distances_dict): feature_distances_calculated = FeatureDistances._from_dict( feature_distances_dict) assert isinstance(feature_distances_calculated, FeatureDistances) assert isinstance(feature_distances_calculated.structure_pair_ids, tuple) assert isinstance(feature_distances_calculated.kinase_pair_ids, tuple) assert isinstance(feature_distances_calculated.distances, np.ndarray) assert isinstance(feature_distances_calculated.bit_coverages, np.ndarray)
def feature_distances(): """ Get FeatureDistances instance with dummy data, i.e. distances and bit coverages between two fingerprints for each of their features. Returns ------- kissim.similarity.FeatureDistances Distances and bit coverages between two fingerprints for each of their features. """ structure_pair_ids = ("molecule1", "molecule2") kinase_pair_ids = ("kinaseA", "kinaseB") distances = np.array([ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 ]) bit_coverages = np.array([ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.5, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ]) # FeatureDistances (lf._get_incoming_resp class attributes manually) feature_distances = FeatureDistances() feature_distances.structure_pair_ids = structure_pair_ids feature_distances.kinase_pair_ids = kinase_pair_ids feature_distances.distances = distances feature_distances.bit_coverages = bit_coverages return feature_distances
def test_calculate_feature_distance(self, feature_pair, distance_measure, distance): """ Test distance calculation for two value (feature) lists. Parameters ---------- feature_pair : np.ndarray Pairwise bits of one feature extracted from two fingerprints (only bit positions without any NaN value). distance_measure : str Type of distance measure, defaults to Euclidean distance. distance : float Distance between two value lists. """ feature_distances = FeatureDistances() distance_calculated = feature_distances._calculate_feature_distance( feature_pair, distance_measure) if np.isnan(distance): assert np.isnan(distance_calculated) else: assert np.isclose(distance_calculated, distance, rtol=1e-04)
def _get_feature_distances(pair, fingerprints): """ Calculate the feature distances for one fingerprint pair. Parameters ---------- fingerprints : dict of tuple of str: kissim.encoding.Fingerprint Dictionary of fingerprints: Keys are molecule codes and values are fingerprint data. Returns ------- kissim.similarity.FeatureDistances Distances and bit coverages between two fingerprints for each of their features. """ fingerprint1 = fingerprints[pair[0]] fingerprint2 = fingerprints[pair[1]] feature_distances = FeatureDistances.from_fingerprints( fingerprint1, fingerprint2) return feature_distances
def feature_distances_generator(): """ Get FeatureDistancesGenerator instance with dummy data. Returns ------- kissim.similarity.FeatureDistancesGenerator Feature distances for multiple fingerprint pairs. """ # FeatureDistances feature_distances1 = FeatureDistances() feature_distances1.structure_pair_ids = ("pdbA", "pdbB") feature_distances1.kinase_pair_ids = ("kinaseA", "kinaseA") feature_distances1.distances = np.array([1.0] * 15) feature_distances1.bit_coverages = np.array([1.0] * 15) feature_distances2 = FeatureDistances() feature_distances2.structure_pair_ids = ("pdbA", "pdbC") feature_distances2.kinase_pair_ids = ("kinaseA", "kinaseB") feature_distances2.distances = np.array([0.0] * 15) feature_distances2.bit_coverages = np.array([1.0] * 15) feature_distances3 = FeatureDistances() feature_distances3.structure_pair_ids = ("pdbB", "pdbC") feature_distances3.kinase_pair_ids = ("kinaseA", "kinaseB") feature_distances3.distances = np.array([0.0] * 15) feature_distances3.bit_coverages = np.array([0.0] * 15) # FeatureDistancesGenerator data = [feature_distances1, feature_distances2, feature_distances3] # FeatureDistancesGenerator feature_distances_generator = FeatureDistancesGenerator() data = feature_distances_generator._feature_distances_list_to_df(data) feature_distances_generator.data = data feature_distances_generator.structure_kinase_ids = [ ("pdbA", "kinaseA"), ("pdbB", "kinaseA"), ("pdbC", "kinaseB"), ] return feature_distances_generator