예제 #1
0
    def test_get_feature_distances_and_bit_coverages(self, feature1, feature2,
                                                     distance, bit_coverage):
        """
        Test if feature distance and bit coverage is correct for given feature bits.

        Parameters
        ----------
        feature1 : pd.Series
            Feature bits for a given feature in fingerprint 1.
        feature2 : pd.Series
            Feature bits for a given feature in fingerprint 2.
        distance : float
            Distance value for a feature pair.
        bit_coverage : float
            Bit coverage value for a feature pair.
        """

        feature_distances = FeatureDistances()
        (
            distance_calculated,
            bit_coverage_calculated,
        ) = feature_distances._get_feature_distances_and_bit_coverages(
            feature1, feature2)

        assert np.isclose(distance_calculated, distance, rtol=1e-04)
        assert np.isclose(bit_coverage_calculated, bit_coverage, rtol=1e-04)
예제 #2
0
    def test_from_fingerprints(self, fingerprint_generator):
        """
        Test data type and dimensions of feature distances between two fingerprints.

        Parameters
        ----------
        fingerprint_generator : FingerprintGenerator
            Multiple fingerprints.
        """

        # Fingerprints
        fingerprints = list(fingerprint_generator.data.values())

        # Get feature distances
        feature_distances = FeatureDistances.from_fingerprints(
            fingerprint1=fingerprints[0], fingerprint2=fingerprints[1])

        # Class attribute types and dimensions correct?
        assert isinstance(feature_distances.structure_pair_ids, tuple)
        assert len(feature_distances.structure_pair_ids) == 2

        assert isinstance(feature_distances.distances, np.ndarray)
        assert len(feature_distances.distances) == 15

        assert isinstance(feature_distances.bit_coverages, np.ndarray)
        assert len(feature_distances.bit_coverages) == 15

        # Class property type and dimension correct?
        assert isinstance(feature_distances.data, pd.DataFrame)

        feature_type_dimension_calculated = feature_distances.data.groupby(
            by="feature_type", sort=False).size()
        feature_type_dimension = pd.Series(
            [8, 4, 3], index="physicochemical distances moments".split())
        assert all(feature_type_dimension_calculated == feature_type_dimension)
예제 #3
0
    def test_calculate_feature_distance_valueerror(self, feature_pair,
                                                   distance_measure):
        """
        Test ValueError exceptions in distance calculation for two value (feature) lists.

        Parameters
        ----------
        feature_pair : np.ndarray
            Pairwise bits of one feature extracted from two fingerprints (only bit positions
            without any NaN value).
        distance_measure : str
            Type of distance measure, defaults to Euclidean distance.
        """

        with pytest.raises(ValueError):
            feature_distance = FeatureDistances()
            feature_distance._calculate_feature_distance(
                feature_pair, distance_measure)
예제 #4
0
    def test_get_feature_distances_and_bit_coverages_valueerror(
            self, feature1, feature2):
        """
        Test ValueError exceptions in feature distance calculation.

        Parameters
        ----------
        feature1 : np.ndarray
            Feature bits for a given feature in fingerprint 1.
        feature2 : np.ndarray
            Feature bits for a given feature in fingerprint 2.
        """

        feature_distances = FeatureDistances()

        with pytest.raises(ValueError):
            feature_distances._get_feature_distances_and_bit_coverages(
                feature1, feature2)
예제 #5
0
    def test_from_dict(self, feature_distances_dict):

        feature_distances_calculated = FeatureDistances._from_dict(
            feature_distances_dict)
        assert isinstance(feature_distances_calculated, FeatureDistances)
        assert isinstance(feature_distances_calculated.structure_pair_ids,
                          tuple)
        assert isinstance(feature_distances_calculated.kinase_pair_ids, tuple)
        assert isinstance(feature_distances_calculated.distances, np.ndarray)
        assert isinstance(feature_distances_calculated.bit_coverages,
                          np.ndarray)
예제 #6
0
def feature_distances():
    """
    Get FeatureDistances instance with dummy data, i.e. distances and bit coverages between two
    fingerprints for each of their features.

    Returns
    -------
    kissim.similarity.FeatureDistances
        Distances and bit coverages between two fingerprints for each of their features.
    """

    structure_pair_ids = ("molecule1", "molecule2")
    kinase_pair_ids = ("kinaseA", "kinaseB")
    distances = np.array([
        0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
        0.0
    ])
    bit_coverages = np.array([
        1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.5, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
        1.0
    ])

    # FeatureDistances (lf._get_incoming_resp class attributes manually)
    feature_distances = FeatureDistances()
    feature_distances.structure_pair_ids = structure_pair_ids
    feature_distances.kinase_pair_ids = kinase_pair_ids
    feature_distances.distances = distances
    feature_distances.bit_coverages = bit_coverages

    return feature_distances
예제 #7
0
    def test_calculate_feature_distance(self, feature_pair, distance_measure,
                                        distance):
        """
        Test distance calculation for two value (feature) lists.

        Parameters
        ----------
        feature_pair : np.ndarray
            Pairwise bits of one feature extracted from two fingerprints (only bit positions
            without any NaN value).
        distance_measure : str
            Type of distance measure, defaults to Euclidean distance.
        distance : float
            Distance between two value lists.
        """

        feature_distances = FeatureDistances()
        distance_calculated = feature_distances._calculate_feature_distance(
            feature_pair, distance_measure)

        if np.isnan(distance):
            assert np.isnan(distance_calculated)
        else:
            assert np.isclose(distance_calculated, distance, rtol=1e-04)
예제 #8
0
    def _get_feature_distances(pair, fingerprints):
        """
        Calculate the feature distances for one fingerprint pair.

        Parameters
        ----------
        fingerprints : dict of tuple of str: kissim.encoding.Fingerprint
            Dictionary of fingerprints: Keys are molecule codes and values are fingerprint data.

        Returns
        -------
        kissim.similarity.FeatureDistances
            Distances and bit coverages between two fingerprints for each of their features.
        """

        fingerprint1 = fingerprints[pair[0]]
        fingerprint2 = fingerprints[pair[1]]

        feature_distances = FeatureDistances.from_fingerprints(
            fingerprint1, fingerprint2)

        return feature_distances
예제 #9
0
def feature_distances_generator():
    """
    Get FeatureDistancesGenerator instance with dummy data.

    Returns
    -------
    kissim.similarity.FeatureDistancesGenerator
        Feature distances for multiple fingerprint pairs.
    """

    # FeatureDistances
    feature_distances1 = FeatureDistances()
    feature_distances1.structure_pair_ids = ("pdbA", "pdbB")
    feature_distances1.kinase_pair_ids = ("kinaseA", "kinaseA")
    feature_distances1.distances = np.array([1.0] * 15)
    feature_distances1.bit_coverages = np.array([1.0] * 15)

    feature_distances2 = FeatureDistances()
    feature_distances2.structure_pair_ids = ("pdbA", "pdbC")
    feature_distances2.kinase_pair_ids = ("kinaseA", "kinaseB")
    feature_distances2.distances = np.array([0.0] * 15)
    feature_distances2.bit_coverages = np.array([1.0] * 15)

    feature_distances3 = FeatureDistances()
    feature_distances3.structure_pair_ids = ("pdbB", "pdbC")
    feature_distances3.kinase_pair_ids = ("kinaseA", "kinaseB")
    feature_distances3.distances = np.array([0.0] * 15)
    feature_distances3.bit_coverages = np.array([0.0] * 15)

    # FeatureDistancesGenerator
    data = [feature_distances1, feature_distances2, feature_distances3]

    # FeatureDistancesGenerator
    feature_distances_generator = FeatureDistancesGenerator()
    data = feature_distances_generator._feature_distances_list_to_df(data)
    feature_distances_generator.data = data
    feature_distances_generator.structure_kinase_ids = [
        ("pdbA", "kinaseA"),
        ("pdbB", "kinaseA"),
        ("pdbC", "kinaseB"),
    ]

    return feature_distances_generator