예제 #1
0
    def test_get_physicochemical_or_spatial_features_dict(
            self, structure_klifs_id):
        """
        Test if physicochemical an spatial features dictionary has correct keys.
        """

        pocket_bp = PocketBioPython.from_structure_klifs_id(
            structure_klifs_id, LOCAL)
        pocket_df = PocketDataFrame.from_structure_klifs_id(
            structure_klifs_id, LOCAL)

        fingerprint = Fingerprint()

        # Physicochemical features
        physicochemical_dict = fingerprint._get_physicochemical_features_dict(
            pocket_bp)
        assert isinstance(physicochemical_dict, dict)
        assert list(
            physicochemical_dict.keys()) == FEATURE_NAMES_PHYSICOCHEMICAL_DICT

        # Spatial features
        spatial_dict = fingerprint._get_spatial_features_dict(pocket_df)
        assert isinstance(spatial_dict, dict)
        assert list(spatial_dict.keys()) == FEATURE_NAMES_SPATIAL_DICT
        assert list(spatial_dict["distances"].keys()
                    ) == FEATURE_NAMES_DISTANCES_AND_MOMENTS
        assert list(spatial_dict["moments"].keys()
                    ) == FEATURE_NAMES_DISTANCES_AND_MOMENTS
예제 #2
0
    def test_to_from_json(self, structure_klifs_id):
        """
        Test if saving/loading a fingerprint to/from a json file.
        """

        fingerprint = Fingerprint.from_structure_klifs_id(
            structure_klifs_id, LOCAL)
        json_filepath = Path("fingerprint.json")

        with enter_temp_directory():

            # Save json file
            fingerprint.to_json(json_filepath)
            assert json_filepath.exists()

            # Load json file
            fingerprint_reloaded = Fingerprint.from_json(json_filepath)
            # Test if class attributes from ID and from json are the same
            assert fingerprint.structure_klifs_id == fingerprint_reloaded.structure_klifs_id
            assert np.allclose(
                fingerprint.values_array(True, True, True),
                fingerprint_reloaded.values_array(True, True, True),
                rtol=0,
                atol=0,
                equal_nan=True,
            )
            assert fingerprint.residue_ids == fingerprint_reloaded.residue_ids
            assert fingerprint.residue_ixs == fingerprint_reloaded.residue_ixs
예제 #3
0
    def test_from_structure_klifs_id(self, structure_klifs_id, fingerprint):
        """
        Test if Fingerprint can be set locally and remotely.
        """

        fingerprint1 = Fingerprint.from_structure_klifs_id(
            structure_klifs_id, LOCAL)
        fingerprint2 = Fingerprint.from_structure_klifs_id(
            structure_klifs_id, REMOTE)

        if fingerprint is None:
            assert fingerprint1 is None
            assert fingerprint2 is None
        else:
            assert isinstance(fingerprint1, Fingerprint)
            assert isinstance(fingerprint2, Fingerprint)

            # Check if locally and remotely obtained fingerprints are the same
            # Use method values_array()
            assert np.allclose(
                fingerprint1.values_array(True, True, True),
                fingerprint2.values_array(True, True, True),
                rtol=0,
                atol=0,
                equal_nan=True,
            )

            # Test attributes
            # Attribute structure_klifs_id
            assert fingerprint1.structure_klifs_id == structure_klifs_id
            assert fingerprint2.structure_klifs_id == structure_klifs_id
            # Attribute values_dict
            assert list(fingerprint1.values_dict.keys()) == FEATURE_NAMES
            assert (list(fingerprint1.values_dict["physicochemical"].keys()) ==
                    FEATURE_NAMES_PHYSICOCHEMICAL_DICT)
            assert list(fingerprint1.values_dict["spatial"].keys()
                        ) == FEATURE_NAMES_SPATIAL_DICT
            assert (list(
                fingerprint1.values_dict["spatial"]["distances"].keys()) ==
                    FEATURE_NAMES_DISTANCES_AND_MOMENTS)
            assert (list(fingerprint1.values_dict["spatial"]["moments"].keys())
                    == FEATURE_NAMES_DISTANCES_AND_MOMENTS)
            assert (list(
                fingerprint1.values_dict["spatial"]["subpocket_centers"].keys(
                )) == FEATURE_NAMES_DISTANCES_AND_MOMENTS)
            # Attribute residue_ids
            assert fingerprint1.residue_ids == fingerprint2.residue_ids
            # Attribute residue_ixs
            assert fingerprint1.residue_ixs == fingerprint2.residue_ixs
            # Attribute subpocket_centers
            assert isinstance(fingerprint1.subpocket_centers, pd.DataFrame)
            assert (fingerprint1.subpocket_centers.columns.to_list() ==
                    FEATURE_NAMES_DISTANCES_AND_MOMENTS)
            assert fingerprint1.subpocket_centers.index.to_list() == [
                "x", "y", "z"
            ]
예제 #4
0
    def from_json(cls, filepath, normalize=False):
        """
        Initialize a FingerprintGenerator object from a json file.

        Parameters
        ----------
        filepath : str or pathlib.Path
            Path to json file.
        normalized : bool
            Add normalization (default: False). This will store the unnormalized features alongside
            the normalized features.
        """

        filepath = Path(filepath)
        with open(filepath, "r") as f:
            json_string = f.read()
        fingerprints_list = json.loads(json_string)

        data = {}
        for fingerprint_dict in fingerprints_list:
            fingerprint = Fingerprint._from_dict(fingerprint_dict)
            data[fingerprint.structure_klifs_id] = fingerprint

        fingerprint_generator = cls()
        fingerprint_generator.data = data
        if normalize:
            fingerprint_generator.data_normalized = fingerprint_generator._normalize_fingerprints()
        fingerprint_generator.structure_klifs_ids = list(fingerprint_generator.data.keys())

        return fingerprint_generator
예제 #5
0
    def test_fingerprint(self, structure_klifs_id):
        """
        Test if normalized fingerprint can be generated from fingerprint.
        Includes tests for the method _normalize() used in the class __init__ method.
        """

        fingerprint = Fingerprint.from_structure_klifs_id(
            structure_klifs_id, LOCAL)
        fingerprint_normalized = FingerprintNormalized.from_fingerprint(
            fingerprint)
예제 #6
0
    def test_moments(self, structure_klifs_id):
        """
        Test DataFrame columns/index names.
        """

        fingerprint = Fingerprint.from_structure_klifs_id(
            structure_klifs_id, LOCAL)
        assert fingerprint.moments.columns.to_list(
        ) == FEATURE_NAMES_DISTANCES_AND_MOMENTS
        assert fingerprint.moments.index.to_list() == [1, 2, 3]
        assert fingerprint.moments.index.name == "moments"
예제 #7
0
    def test_distances(self, structure_klifs_id):
        """
        Test DataFrame columns/index names.
        """

        fingerprint = Fingerprint.from_structure_klifs_id(
            structure_klifs_id, LOCAL)
        assert fingerprint.distances.columns.to_list(
        ) == FEATURE_NAMES_DISTANCES_AND_MOMENTS
        assert fingerprint.distances.index.to_list() == list(range(1, 86))
        assert fingerprint.distances.index.name == "residue.ix"
예제 #8
0
    def test_physicochemical(self, structure_klifs_id):
        """
        Test DataFrame columns/index names.
        """

        fingerprint = Fingerprint.from_structure_klifs_id(
            structure_klifs_id, LOCAL)
        assert fingerprint.physicochemical.columns.to_list(
        ) == FEATURE_NAMES_PHYSICOCHEMICAL
        assert fingerprint.physicochemical.index.to_list() == list(range(
            1, 86))
        assert fingerprint.physicochemical.index.name == "residue.ix"
예제 #9
0
    def _get_fingerprint(self, structure_klifs_id, klifs_session):
        """
        Generate a fingerprint.

        Parameters
        ----------
        structure_klifs_id : int
            Structure KLIFS ID.
        klifs_session : opencadd.databases.klifs.session.Session
            Local or remote KLIFS session.

        Returns
        -------
        kissim.encoding.fingerprint
            Fingerprint.
        """

        logger.info(f"{structure_klifs_id}: Generate fingerprint...")
        fingerprint = Fingerprint.from_structure_klifs_id(structure_klifs_id, klifs_session)
        return fingerprint
예제 #10
0
    def test_values_array(self, structure_klifs_id, values_array_mean):
        """
        Tets fingerprint values array.
        """

        fingerprint = Fingerprint.from_structure_klifs_id(
            structure_klifs_id, LOCAL)
        values_array_mean_calculated = np.nanmean(
            fingerprint.values_array(True, True, True))
        assert pytest.approx(values_array_mean_calculated,
                             abs=1e-4) == values_array_mean

        # Test the different lengths of the final fingerprint based on the selection of
        # physicochemical, distances and moments features.
        assert fingerprint.values_array(False, False, False).size == 0
        assert fingerprint.values_array(True, False, False).size == 680
        assert fingerprint.values_array(False, True, False).size == 340
        assert fingerprint.values_array(False, False, True).size == 12
        assert fingerprint.values_array(True, True, False).size == 1020
        assert fingerprint.values_array(True, False, True).size == 692
        assert fingerprint.values_array(False, True, True).size == 352
        assert fingerprint.values_array(True, True, True).size == 1032
예제 #11
0
class TestsFeatureDistancesGenerator:
    """
    Test FeatureDistancesGenerator class methods.
    """
    @pytest.mark.parametrize(
        "feature_weights, structure_ids, kinase_ids",
        [(
            None,
            ["HUMAN/ABL1_2g2i_chainA", "HUMAN/AAK1_4wsq_altA_chainB"],
            ["AAK1", "ABL1"],
        )],
    )
    def test_from_fingerprints(
        self,
        fingerprint_generator,
        feature_weights,
        structure_ids,
        kinase_ids,
    ):
        """
        Test FeatureDistancesGenerator class attributes.
        """

        # Test FeatureDistancesGenerator class attributes
        feature_distances_generator = FeatureDistancesGenerator.from_fingerprint_generator(
            fingerprint_generator)
        assert isinstance(feature_distances_generator,
                          FeatureDistancesGenerator)

        # Test attributes
        assert isinstance(feature_distances_generator.data, pd.DataFrame)
        assert (feature_distances_generator.data.columns.to_list() == [
            "structure.1",
            "structure.2",
            "kinase.1",
            "kinase.2",
        ] + [f"distance.{i}" for i in range(1, 16)] +
                [f"bit_coverage.{i}" for i in range(1, 16)])
        assert isinstance(feature_distances_generator.structure_kinase_ids,
                          list)

    @pytest.mark.parametrize(
        "structure_klifs_ids, klifs_session, n_cores",
        [
            ([110, 118], REMOTE, 1),
            ([110, 118], REMOTE, 2),
            ([110, 118], LOCAL, 1),
            ([110, 118], LOCAL, 2),
            ([110, 118], None, None),
        ],
    )
    def test_from_structure_klifs_ids(self, structure_klifs_ids, klifs_session,
                                      n_cores):

        # Test FeatureDistancesGenerator class attributes
        feature_distances_generator = FeatureDistancesGenerator.from_structure_klifs_ids(
            structure_klifs_ids, klifs_session, n_cores)
        assert isinstance(feature_distances_generator,
                          FeatureDistancesGenerator)

        # Test attributes
        assert isinstance(feature_distances_generator.data, pd.DataFrame)
        assert isinstance(feature_distances_generator.structure_kinase_ids,
                          list)

    @pytest.mark.parametrize(
        "structure_kinase_ids",
        [[["pdbA", "kinaseA"], ["pdbB", "kinaseA"], ["pdbC", "kinaseB"]]],
    )
    def test_structure_kinase_ids(self, feature_distances_generator,
                                  structure_kinase_ids):

        assert feature_distances_generator._structure_kinase_ids == structure_kinase_ids

    @pytest.mark.parametrize(
        "structure_pair_ids",
        [[["pdbA", "pdbB"], ["pdbA", "pdbC"], ["pdbB", "pdbC"]]])
    def test_structure_pair_ids(self, feature_distances_generator,
                                structure_pair_ids):

        assert feature_distances_generator.structure_pair_ids == structure_pair_ids

    @pytest.mark.parametrize(
        "kinase_pair_ids",
        [[["kinaseA", "kinaseA"], ["kinaseA", "kinaseB"],
          ["kinaseA", "kinaseB"]]],
    )
    def test_kinase_pair_ids(self, feature_distances_generator,
                             kinase_pair_ids):

        assert feature_distances_generator.kinase_pair_ids == kinase_pair_ids

    @pytest.mark.parametrize("structure_ids", [["pdbA", "pdbB", "pdbC"]])
    def test_structure_ids(self, feature_distances_generator, structure_ids):

        assert feature_distances_generator.structure_ids == structure_ids

    @pytest.mark.parametrize("kinase_ids", [["kinaseA", "kinaseB"]])
    def test_kinase_ids(self, feature_distances_generator, kinase_ids):

        assert feature_distances_generator.kinase_ids == kinase_ids

    def test_to_from_csv(self, feature_distances_generator):

        with enter_temp_directory():

            filepath = Path("test.csv")

            feature_distances_generator.to_csv(filepath)
            assert filepath.exists()

            feature_distances_generator_from_csv = FeatureDistancesGenerator.from_csv(
                filepath)
            assert isinstance(feature_distances_generator_from_csv,
                              FeatureDistancesGenerator)

    @pytest.mark.parametrize(
        "fingerprints, pairs",
        [(
            {
                "a": Fingerprint(),
                "b": Fingerprint(),
                "c": Fingerprint()
            },
            [("a", "b"), ("a", "c"), ("b", "c")],
        )],
    )
    def test_fingerprint_pairs(self, fingerprints, pairs):
        """
        Test calculation of all fingerprint pair combinations from fingerprints dictionary.

        Parameters
        ----------
        fingerprints : dict of kissim.encoding.Fingerprint
            Dictionary of fingerprints: Keys are molecule codes and values are fingerprint data.
        pairs : list of list of str
            List of molecule code pairs (list).
        """

        generator = FeatureDistancesGenerator()
        pairs_calculated = generator._fingerprint_pairs(fingerprints)

        for pair_calculated, pair in zip(pairs_calculated, pairs):
            assert pair_calculated == pair

    def test_get_feature_distances(self, fingerprint_generator):
        """
        Test if return type is instance of FeatureDistance class.

        Parameters
        ----------
        fingerprint_generator : FingerprintGenerator
            Multiple fingerprints.
        """

        # Get fingerprint pair from FingerprintGenerator
        pair = list(fingerprint_generator.data.keys())[:2]
        fingerprints = fingerprint_generator.data

        # Test feature distance calculation
        feature_distances_generator = FeatureDistancesGenerator()
        feature_distances_calculated = feature_distances_generator._get_feature_distances(
            pair, fingerprints)

        assert isinstance(feature_distances_calculated, FeatureDistances)

    def test_get_feature_distances_from_list(self, fingerprint_generator):
        """
        Test if return type is instance of list of FeatureDistance class.

        Parameters
        ----------
        fingerprint_generator : FingerprintGenerator
            Multiple fingerprints.
        """

        # Test bulk feature distance calculation
        generator = FeatureDistancesGenerator()

        feature_distances_list = generator._get_feature_distances_from_list(
            generator._get_feature_distances, fingerprint_generator.data, 1)

        assert isinstance(feature_distances_list, list)

        for i in feature_distances_list:
            assert isinstance(i, FeatureDistances)