Example #1
0
def fingerprint_distance_generator():
    """
    Get FingerprintDistanceGenerator instance with dummy data.

    Returns
    -------
    kissim.similarity.FingerprintDistanceGenerator
        Fingerprint distance for multiple fingerprint pairs.
    """

    data = pd.DataFrame({
        "structure.1": ["pdbA", "pdbA", "pdbB"],
        "structure.2": ["pdbB", "pdbC", "pdbC"],
        "kinase.1": ["kinaseA", "kinaseA", "kinaseA"],
        "kinase.2": ["kinaseA", "kinaseB", "kinaseB"],
        "distance": np.array([0.75, 1.0, 0.8]),
        "bit_coverage": np.array([1.0, 1.0, 1.0]),
    })

    # FingerprintDistanceGenerator
    fingerprint_distance_generator = FingerprintDistanceGenerator()
    fingerprint_distance_generator.data = data
    fingerprint_distance_generator.structure_kinase_ids = [
        ("pdbA", "kinaseA"),
        ("pdbB", "kinaseA"),
        ("pdbC", "kinaseB"),
    ]

    return fingerprint_distance_generator
Example #2
0
def weight_feature_distances(
    feature_distances_generator, output_filepath=None, feature_weights=None
):
    """
    Weight feature distances: Generates per fingerprint pair a fingerprint distance.

    Parameters
    ----------
    feature_distances_generator : kissim.encoding.FeatureDistancesGenerator
        Feature distances.
    output_path : str
        Path to output folder.
    feature_weights : None or list of float
        Feature weights of the following form:
        (i) None
            Default feature weights: All features equally distributed to 1/15
            (15 features in total).
        (ii) By feature (list of 15 floats):
            Features to be set in the following order: size, hbd, hba, charge, aromatic,
            aliphatic, sco, exposure, distance_to_centroid, distance_to_hinge_region,
            distance_to_dfg_region, distance_to_front_pocket, moment1, moment2, and moment3.
            All floats must sum up to 1.0.
    """

    fingerprint_distance_generator = FingerprintDistanceGenerator.from_feature_distances_generator(
        feature_distances_generator, feature_weights
    )

    if output_filepath:
        output_filepath = Path(output_filepath)
        fingerprint_distance_generator.to_csv(output_filepath)

    return fingerprint_distance_generator
    def test_from_fingerprint_generator(self, fingerprint_generator):
        """
        Test FingerprintDistanceGenerator generation from a fingerprint generator.
        """

        fingerprint_distance_generator = FingerprintDistanceGenerator.from_fingerprint_generator(
            fingerprint_generator
        )
        assert isinstance(fingerprint_distance_generator, FingerprintDistanceGenerator)
    def test_from_structure_klifs_ids(self, structure_klifs_ids, klifs_session, feature_weights):
        """
        Test FingerprintDistanceGenerator generation from structure KLIFS IDs.
        """

        fingerprint_distance_generator = FingerprintDistanceGenerator.from_structure_klifs_ids(
            structure_klifs_ids, klifs_session, feature_weights
        )
        assert isinstance(fingerprint_distance_generator, FingerprintDistanceGenerator)
    def test_from_structure_klifs_ids(self, structure_klifs_ids, klifs_session, feature_weights):
        """
        Test FeatureDistancesGenerator class attributes.
        """

        # Test FeatureDistancesGenerator class attributes
        feature_distances_generator = FingerprintDistanceGenerator.from_structure_klifs_ids(
            structure_klifs_ids, klifs_session, feature_weights
        )
        assert isinstance(feature_distances_generator, FingerprintDistanceGenerator)
Example #6
0
def weight_feature_distances(
    feature_distances_generator, output_filepath=None, feature_weights=None
):
    """
    Weight feature distances: Generates per fingerprint pair a fingerprint distance.

    Parameters
    ----------
    feature_distances_generator : kissim.encoding.FeatureDistancesGenerator
        Feature distances.
    output_path : str
        Path to output file.
    feature_weights : None or list of float
        Feature weights of the following form:
        (i) None
            Default feature weights: All features equally distributed to 1/15
            (15 features in total).
        (ii) By feature (list of 15 floats):
            Features to be set in the following order: size, hbd, hba, charge, aromatic,
            aliphatic, sco, exposure, distance_to_centroid, distance_to_hinge_region,
            distance_to_dfg_region, distance_to_front_pocket, moment1, moment2, and moment3.
            All floats must sum up to 1.0.
    """

    fingerprint_distance_generator = FingerprintDistanceGenerator.from_feature_distances_generator(
        feature_distances_generator, feature_weights
    )

    if output_filepath:
        output_filepath = Path(output_filepath)

        # Write fingerprint distances to file
        fingerprint_distances_filepath = output_filepath
        fingerprint_distance_generator.to_csv(fingerprint_distances_filepath)

        # Write default kinase distances to file
        kinase_distances_filepath = (
            output_filepath.parent / f"{output_filepath.stem}_to_kinase_matrix.csv"
        )
        kinase_matrix = fingerprint_distance_generator.kinase_distance_matrix()
        kinase_matrix.index.name = None
        kinase_matrix.columns.name = None
        kinase_matrix.to_csv(kinase_distances_filepath, index=True)
        print(kinase_matrix)

        # Write default tree to file
        tree_filepath = output_filepath.parent / f"{output_filepath.stem}_to_kinase_clusters.tree"
        annotation_filepath = output_filepath.parent / "kinase_annotation.csv"
        tree.from_distance_matrix(kinase_matrix, tree_filepath, annotation_filepath)

    return fingerprint_distance_generator
Example #7
0
def weights(feature_distances_path, feature_weights=None, fingerprint_distances_path=None):
    """
    Apply feature distances weighting to calculate fingerprint distances.

    Parameters
    ----------
    feature_distances_path : str or pathlib.Path
        Path to feature distances CSV file.
    feature_weights : None or list of float
        Feature weights of the following form:
        (i) None
            Default feature weights: All features equally distributed to 1/15
            (15 features in total).
        (ii) By feature (list of 15 floats):
            Features to be set in the following order: size, hbd, hba, charge, aromatic,
            aliphatic, sco, exposure, distance_to_centroid, distance_to_hinge_region,
            distance_to_dfg_region, distance_to_front_pocket, moment1, moment2, and moment3.
            All floats must sum up to 1.0.
    fingerprint_distances_path : None or str or pathlib.Path
        Path to output fingerprint distances CSV file.

    Returns
    -------
    kissim.comparison.FingerprintDistanceGenerator
        Pairwise fingerprint distances.
    """

    # Load feature distances
    feature_distances_path = Path(feature_distances_path)
    logger.info(f"Read feature distances from {feature_distances_path}...")
    feature_distances_generator = FeatureDistancesGenerator.from_csv(feature_distances_path)

    # Calculate fingerprint distances
    logger.info(f"Feature weights: {feature_weights}")
    fingerprint_distance_generator = FingerprintDistanceGenerator.from_feature_distances_generator(
        feature_distances_generator, feature_weights
    )

    # Optionally: Save to file
    if fingerprint_distances_path is not None:
        fingerprint_distances_path = Path(fingerprint_distances_path)
        logger.info(f"To file {fingerprint_distances_path}")
        fingerprint_distance_generator.to_csv(fingerprint_distances_path)

    return fingerprint_distance_generator
    def test_from_feature_distances_generator(
        self,
        feature_distances_generator,
        feature_weights,
        structure_kinase_ids,
        structure_pair_ids,
        kinase_pair_ids,
        structure_ids,
        kinase_ids,
    ):
        """
        Test FingerprintDistanceGenerator class attributes.
        """

        # FingerprintDistanceGenerator
        fingerprint_distance_generator = (
            FingerprintDistanceGenerator.from_feature_distances_generator(
                feature_distances_generator
            )
        )

        # Test attributes
        assert isinstance(fingerprint_distance_generator.data, pd.DataFrame)
        data_columns = [
            "structure.1",
            "structure.2",
            "kinase.1",
            "kinase.2",
            "distance",
            "bit_coverage",
        ]
        assert fingerprint_distance_generator.data.columns.to_list() == data_columns
        assert fingerprint_distance_generator.structure_kinase_ids == structure_kinase_ids

        # Test properties
        assert fingerprint_distance_generator.structure_pair_ids == structure_pair_ids
        assert fingerprint_distance_generator.kinase_pair_ids == kinase_pair_ids
        assert fingerprint_distance_generator.structure_ids == structure_ids
        assert fingerprint_distance_generator.kinase_ids == kinase_ids
        assert isinstance(fingerprint_distance_generator.distances, np.ndarray)
        assert isinstance(fingerprint_distance_generator.bit_coverages, np.ndarray)
Example #9
0
def test_main_compare(fingerprint_generator, args):
    """
    Test CLI for encoding using subprocesses.
    """

    args = args.split()
    input_filepath = Path(args[3])
    output_path = Path(args[5])

    # Generate
    fingerprint_generator.to_json(input_filepath)

    subprocess.run(args, check=True)

    ### Feature distances generator
    # CSV file there?
    feature_distances_filepath = output_path / "feature_distances.csv"
    assert feature_distances_filepath.exists()
    # CSV file can be loaded as FeatureDistancesGeneration object?
    feature_distances_generator = FeatureDistancesGenerator.from_csv(
        feature_distances_filepath)
    assert isinstance(feature_distances_generator, FeatureDistancesGenerator)
    assert isinstance(feature_distances_generator.data, pd.DataFrame)

    ### Fingerprint distance generator
    # CSV file there?
    fingerprint_distance_filepath = output_path / "fingerprint_distances.csv"
    assert fingerprint_distance_filepath.exists()
    # CSV file can be loaded as FingerprintDistanceGeneration object?
    fingerprint_distance_generator = FingerprintDistanceGenerator.from_csv(
        fingerprint_distance_filepath)
    assert isinstance(fingerprint_distance_generator,
                      FingerprintDistanceGenerator)
    assert isinstance(fingerprint_distance_generator.data, pd.DataFrame)

    # Delete file - cannot be done within enter_tmp_directory, since temporary files
    # apparently cannot be read from CLI
    input_filepath.unlink()
    feature_distances_filepath.unlink()
    fingerprint_distance_filepath.unlink()