def test_physicochemical_distances_moments(self, structure_klifs_ids,
                                               normalized):
        """
        Test feature group extraction methods.

        Notes
        -----
        Input structure KLIFS IDs must be able to generate a valid fingerprint, otherwise test will
        fail.
        """

        fingerprints = FingerprintGenerator.from_structure_klifs_ids(
            structure_klifs_ids)

        physicochemical = fingerprints.physicochemical(normalized)
        assert physicochemical.index.to_list() == structure_klifs_ids
        if normalized:
            assert physicochemical.columns.to_list(
            ) == FEATURE_NAMES_PHYSICOCHEMICAL
        else:
            assert physicochemical.columns.to_list(
            ) == FEATURE_NAMES_PHYSICOCHEMICAL_DICT
        assert isinstance(physicochemical.iloc[0, 0], list)

        distances = fingerprints.distances(normalized)
        assert distances.index.to_list() == structure_klifs_ids
        assert distances.columns.to_list(
        ) == FEATURE_NAMES_DISTANCES_AND_MOMENTS
        assert isinstance(distances.iloc[0, 0], list)

        moments = fingerprints.moments(normalized)
        assert moments.index.to_list() == structure_klifs_ids
        assert moments.columns.to_list() == FEATURE_NAMES_DISTANCES_AND_MOMENTS
        assert isinstance(moments.iloc[0, 0], list)
Beispiel #2
0
    def from_structure_klifs_ids(cls,
                                 structure_klifs_ids,
                                 klifs_session=None,
                                 n_cores=1):
        """
        Calculate feature distances for all possible structure pairs.

        Parameters
        ----------
        structure_klifs_id : int
            Input structure KLIFS ID (output fingerprints may contain less IDs because some
            structures could not be encoded).
        klifs_session : opencadd.databases.klifs.session.Session
            Local or remote KLIFS session.
        n_cores : int or None
            Number of cores to be used for fingerprint generation as defined by the user.

        Returns
        -------
        kissim.comparison.FeatureDistancesGenerator
            Feature distances generator.
        """

        fingerprint_generator = FingerprintGenerator.from_structure_klifs_ids(
            structure_klifs_ids, klifs_session, n_cores)
        feature_distances_generator = cls.from_fingerprint_generator(
            fingerprint_generator, n_cores)
        return feature_distances_generator
    def test_physicochemical_distances_moments_exploded(
            self, structure_klifs_ids, normalized):
        """
        Test feature group extraction methods.

        Notes
        -----
        Input structure KLIFS IDs must be able to generate a valid fingerprint, otherwise test will
        fail.
        """

        fingerprints = FingerprintGenerator.from_structure_klifs_ids(
            structure_klifs_ids)

        def _index_structure_klifs_id(multiplier):
            index_structure_klifs_id = []
            for i in structure_klifs_ids:
                index_structure_klifs_id.extend([i] * multiplier)
            return index_structure_klifs_id

        index_residue_ix = list(range(1, 86)) * len(structure_klifs_ids)
        index_moment = list(range(1, 4)) * len(structure_klifs_ids)

        physicochemical = fingerprints.physicochemical_exploded(normalized)
        assert physicochemical.index.get_level_values(
            "structure_klifs_id").to_list() == _index_structure_klifs_id(85)
        assert physicochemical.index.get_level_values(
            "residue_ix").to_list() == index_residue_ix
        if normalized:
            assert physicochemical.columns.to_list(
            ) == FEATURE_NAMES_PHYSICOCHEMICAL
        else:
            assert physicochemical.columns.to_list(
            ) == FEATURE_NAMES_PHYSICOCHEMICAL_DICT
        assert physicochemical.dtypes.unique() == "float64"

        distances = fingerprints.distances_exploded(normalized)
        assert distances.index.get_level_values(
            "structure_klifs_id").to_list() == _index_structure_klifs_id(85)
        assert distances.index.get_level_values(
            "residue_ix").to_list() == index_residue_ix
        assert distances.columns.to_list(
        ) == FEATURE_NAMES_DISTANCES_AND_MOMENTS
        assert distances.dtypes.unique() == "float64"

        moments = fingerprints.moments_exploded(normalized)
        assert moments.index.get_level_values(
            "structure_klifs_id").to_list() == _index_structure_klifs_id(3)
        assert moments.index.get_level_values(
            "moment").to_list() == index_moment
        assert moments.columns.to_list() == FEATURE_NAMES_DISTANCES_AND_MOMENTS
        assert moments.dtypes.unique() == "float64"
Beispiel #4
0
def compare_from_cli(args):
    """
    Compare fingerprints.

    Parameters
    ----------
    args : argsparse.Namespace
        CLI arguments.
    """

    configure_logger(Path(args.output) / "distances.log")
    fingerprint_generator = FingerprintGenerator.from_json(args.input)
    compare(fingerprint_generator, args.output, args.weights, args.ncores)
Beispiel #5
0
def outliers(fingerprints_path,
             distance_cutoff,
             fingerprints_wo_outliers_path=None):
    """
    Remove outlier fingerprints (defined by spatial distances maximum).

    Parameters
    ----------
    fingerprints_path : str or pathlib.Path
        Path to fingerprints JSON file.
    distance_cutoff : float
        Tolerated distance maximum; fingerprints with distances greater than this cutoff will be
        removed.
    fingerprints_wo_outliers_path : None or str or pathlib.Path
        Path to fingerprints JSON file with outliers removed.

    Returns
    -------
    kissim.encoding.FingerprintGenerator
        Fingerprints without outliers.
    """

    # Load fingerprints
    logger.info("Read fingerprints...")
    fingerprints_path = Path(fingerprints_path)
    fingerprint_generator = FingerprintGenerator.from_json(fingerprints_path)
    logger.info(f"Number of fingerprints: {len(fingerprint_generator.data)}")

    # Find structures/fingerprints IDs to be removed
    logger.info(f"Use the following distance minimum/maximum cutoffs"
                f" to identify outlier structures: {distance_cutoff}")
    remove_structure_ids = []
    for structure_id, fp in fingerprint_generator.data.items():
        if (fp.distances > distance_cutoff).any().any():
            remove_structure_ids.append(structure_id)
    logger.info(f"Structure IDs to be removed: {remove_structure_ids}")

    # Remove fingerprints
    logger.info("Remove fingerprints with distance outliers...")
    for structure_id in remove_structure_ids:
        del fingerprint_generator.data[structure_id]
    logger.info(f"Number of fingerprints: {len(fingerprint_generator.data)}")

    # Optionally: Save to file
    if fingerprints_wo_outliers_path is not None:
        logger.info(
            f"Save cleaned fingerprints to {fingerprints_wo_outliers_path}...")
        fingerprints_wo_outliers_path = Path(fingerprints_wo_outliers_path)
        fingerprint_generator.to_json(fingerprints_wo_outliers_path)

    return fingerprint_generator
    def test_to_from_json(self, structure_klifs_ids, normalize,
                          values_array_sum):
        """
        Test if saving/loading a fingerprint to/from a json file.
        """

        fingerprints = FingerprintGenerator.from_structure_klifs_ids(
            structure_klifs_ids, LOCAL, 1)
        json_filepath = Path("fingerprints.json")

        with enter_temp_directory():

            # Save json file
            fingerprints.to_json(json_filepath)
            assert json_filepath.exists()

            # Load json file
            fingerprints_reloaded = FingerprintGenerator.from_json(
                json_filepath, normalize)

        assert isinstance(fingerprints_reloaded, FingerprintGenerator)
        # Attribute data
        assert list(fingerprints.data.keys()) == list(
            fingerprints_reloaded.data.keys())
        if normalize:
            assert list(fingerprints.data_normalized.keys()) == list(
                fingerprints_reloaded.data_normalized.keys())
        else:
            assert fingerprints_reloaded.data_normalized is None
        values_array_sum_calculated = sum([
            np.nansum(fingerprint.values_array(True, True, True))
            for structure_klifs_id, fingerprint in
            fingerprints_reloaded.data.items()
        ])
        assert pytest.approx(values_array_sum_calculated,
                             abs=1e-4) == values_array_sum
    def test_from_structure_klifs_id(self, structure_klifs_ids, klifs_session,
                                     n_cores, fingerprints_values_array_sum):
        """
        Test if fingerprints can be generated locally and remotely in sequence and in parallel.
        """

        fingerprints = FingerprintGenerator.from_structure_klifs_ids(
            structure_klifs_ids, klifs_session, n_cores)
        # Test attributes
        # Attribute: structure_klifs_id
        assert fingerprints.structure_klifs_ids == structure_klifs_ids
        # Attribute: klifs_session
        if klifs_session is not None:
            assert fingerprints.klifs_session == klifs_session
        else:
            # If no session was provided, use set up remote session
            assert fingerprints.klifs_session._client is not None
        # Attribute: data
        assert isinstance(fingerprints.data, dict)
        for key, value in fingerprints.data.items():
            assert isinstance(key, int)
            assert isinstance(value, Fingerprint)
        fingerprints_values_array_sum_calculated = sum([
            np.nansum(fingerprint.values_array(True, True, True))
            for structure_klifs_id, fingerprint in fingerprints.data.items()
        ])
        assert (pytest.approx(fingerprints_values_array_sum_calculated,
                              abs=1e-4) == fingerprints_values_array_sum)
        # Attribute: data_normalized
        assert isinstance(fingerprints.data_normalized, dict)
        for key, value in fingerprints.data_normalized.items():
            assert isinstance(key, int)
            assert isinstance(value, FingerprintNormalized)

        # Property: subpocket_centers
        assert isinstance(fingerprints.subpocket_centers, pd.DataFrame)
        assert fingerprints.subpocket_centers.index.to_list(
        ) == structure_klifs_ids
        assert (fingerprints.subpocket_centers.columns.get_level_values(
            0).to_list() == np.repeat(FEATURE_NAMES_DISTANCES_AND_MOMENTS,
                                      3).tolist())
        assert (fingerprints.subpocket_centers.columns.get_level_values(
            1).to_list() == [
                "x",
                "y",
                "z",
            ] * len(FEATURE_NAMES_DISTANCES_AND_MOMENTS))
Beispiel #8
0
def test_main_encode(args):
    """
    Test CLI for encoding using subprocesses.
    """

    output = Path("fingerprints.json")
    args = args.split()

    with enter_temp_directory():
        subprocess.run(args, check=True)

        # Json file there?
        assert output.exists()
        # Log file there?
        assert Path(f"{output.stem}.log").exists()

        # Json file can be loaded as FingerprintGenerator object?
        fingerprint_generator = FingerprintGenerator.from_json(output)
        assert isinstance(fingerprint_generator, FingerprintGenerator)
        assert isinstance(list(fingerprint_generator.data.values())[0], Fingerprint)
Beispiel #9
0
def encode(structure_klifs_ids,
           fingerprints_filepath=None,
           local_klifs_download_path=None,
           n_cores=1):
    """
    Encode structures.

    Parameters
    ----------
    structure_klifs_ids : list of int
        Structure KLIFS IDs.
    fingerprints_filepath : str or pathlib.Path
        Path to output json file. Default None.
    local_klifs_download_path : str or None
        If path to local KLIFS download is given, set up local KLIFS session.
        If None is given, set up remote KLIFS session.
    n_cores : int
        Number of cores used to generate fingerprints.

    Returns
    -------
    kissim.encoding.FingerprintGenerator
        Fingerprints.
    """

    # Set up KLIFS session
    klifs_session = _setup_klifs_session(local_klifs_download_path)

    # Generate fingerprints
    fingerprints = FingerprintGenerator.from_structure_klifs_ids(
        structure_klifs_ids, klifs_session, n_cores)

    # Optionally: Save fingerprints to json file
    if fingerprints_filepath:
        logger.info(f"Write fingerprints to file: {fingerprints_filepath}")
        fingerprints.to_json(fingerprints_filepath)

    return fingerprints