def fingerprint_distance_generator(): """ Get FingerprintDistanceGenerator instance with dummy data. Returns ------- kissim.similarity.FingerprintDistanceGenerator Fingerprint distance for multiple fingerprint pairs. """ data = pd.DataFrame({ "structure.1": ["pdbA", "pdbA", "pdbB"], "structure.2": ["pdbB", "pdbC", "pdbC"], "kinase.1": ["kinaseA", "kinaseA", "kinaseA"], "kinase.2": ["kinaseA", "kinaseB", "kinaseB"], "distance": np.array([0.75, 1.0, 0.8]), "bit_coverage": np.array([1.0, 1.0, 1.0]), }) # FingerprintDistanceGenerator fingerprint_distance_generator = FingerprintDistanceGenerator() fingerprint_distance_generator.data = data fingerprint_distance_generator.structure_kinase_ids = [ ("pdbA", "kinaseA"), ("pdbB", "kinaseA"), ("pdbC", "kinaseB"), ] return fingerprint_distance_generator
def weight_feature_distances( feature_distances_generator, output_filepath=None, feature_weights=None ): """ Weight feature distances: Generates per fingerprint pair a fingerprint distance. Parameters ---------- feature_distances_generator : kissim.encoding.FeatureDistancesGenerator Feature distances. output_path : str Path to output folder. feature_weights : None or list of float Feature weights of the following form: (i) None Default feature weights: All features equally distributed to 1/15 (15 features in total). (ii) By feature (list of 15 floats): Features to be set in the following order: size, hbd, hba, charge, aromatic, aliphatic, sco, exposure, distance_to_centroid, distance_to_hinge_region, distance_to_dfg_region, distance_to_front_pocket, moment1, moment2, and moment3. All floats must sum up to 1.0. """ fingerprint_distance_generator = FingerprintDistanceGenerator.from_feature_distances_generator( feature_distances_generator, feature_weights ) if output_filepath: output_filepath = Path(output_filepath) fingerprint_distance_generator.to_csv(output_filepath) return fingerprint_distance_generator
def test_from_fingerprint_generator(self, fingerprint_generator): """ Test FingerprintDistanceGenerator generation from a fingerprint generator. """ fingerprint_distance_generator = FingerprintDistanceGenerator.from_fingerprint_generator( fingerprint_generator ) assert isinstance(fingerprint_distance_generator, FingerprintDistanceGenerator)
def test_from_structure_klifs_ids(self, structure_klifs_ids, klifs_session, feature_weights): """ Test FingerprintDistanceGenerator generation from structure KLIFS IDs. """ fingerprint_distance_generator = FingerprintDistanceGenerator.from_structure_klifs_ids( structure_klifs_ids, klifs_session, feature_weights ) assert isinstance(fingerprint_distance_generator, FingerprintDistanceGenerator)
def test_from_structure_klifs_ids(self, structure_klifs_ids, klifs_session, feature_weights): """ Test FeatureDistancesGenerator class attributes. """ # Test FeatureDistancesGenerator class attributes feature_distances_generator = FingerprintDistanceGenerator.from_structure_klifs_ids( structure_klifs_ids, klifs_session, feature_weights ) assert isinstance(feature_distances_generator, FingerprintDistanceGenerator)
def weight_feature_distances( feature_distances_generator, output_filepath=None, feature_weights=None ): """ Weight feature distances: Generates per fingerprint pair a fingerprint distance. Parameters ---------- feature_distances_generator : kissim.encoding.FeatureDistancesGenerator Feature distances. output_path : str Path to output file. feature_weights : None or list of float Feature weights of the following form: (i) None Default feature weights: All features equally distributed to 1/15 (15 features in total). (ii) By feature (list of 15 floats): Features to be set in the following order: size, hbd, hba, charge, aromatic, aliphatic, sco, exposure, distance_to_centroid, distance_to_hinge_region, distance_to_dfg_region, distance_to_front_pocket, moment1, moment2, and moment3. All floats must sum up to 1.0. """ fingerprint_distance_generator = FingerprintDistanceGenerator.from_feature_distances_generator( feature_distances_generator, feature_weights ) if output_filepath: output_filepath = Path(output_filepath) # Write fingerprint distances to file fingerprint_distances_filepath = output_filepath fingerprint_distance_generator.to_csv(fingerprint_distances_filepath) # Write default kinase distances to file kinase_distances_filepath = ( output_filepath.parent / f"{output_filepath.stem}_to_kinase_matrix.csv" ) kinase_matrix = fingerprint_distance_generator.kinase_distance_matrix() kinase_matrix.index.name = None kinase_matrix.columns.name = None kinase_matrix.to_csv(kinase_distances_filepath, index=True) print(kinase_matrix) # Write default tree to file tree_filepath = output_filepath.parent / f"{output_filepath.stem}_to_kinase_clusters.tree" annotation_filepath = output_filepath.parent / "kinase_annotation.csv" tree.from_distance_matrix(kinase_matrix, tree_filepath, annotation_filepath) return fingerprint_distance_generator
def weights(feature_distances_path, feature_weights=None, fingerprint_distances_path=None): """ Apply feature distances weighting to calculate fingerprint distances. Parameters ---------- feature_distances_path : str or pathlib.Path Path to feature distances CSV file. feature_weights : None or list of float Feature weights of the following form: (i) None Default feature weights: All features equally distributed to 1/15 (15 features in total). (ii) By feature (list of 15 floats): Features to be set in the following order: size, hbd, hba, charge, aromatic, aliphatic, sco, exposure, distance_to_centroid, distance_to_hinge_region, distance_to_dfg_region, distance_to_front_pocket, moment1, moment2, and moment3. All floats must sum up to 1.0. fingerprint_distances_path : None or str or pathlib.Path Path to output fingerprint distances CSV file. Returns ------- kissim.comparison.FingerprintDistanceGenerator Pairwise fingerprint distances. """ # Load feature distances feature_distances_path = Path(feature_distances_path) logger.info(f"Read feature distances from {feature_distances_path}...") feature_distances_generator = FeatureDistancesGenerator.from_csv(feature_distances_path) # Calculate fingerprint distances logger.info(f"Feature weights: {feature_weights}") fingerprint_distance_generator = FingerprintDistanceGenerator.from_feature_distances_generator( feature_distances_generator, feature_weights ) # Optionally: Save to file if fingerprint_distances_path is not None: fingerprint_distances_path = Path(fingerprint_distances_path) logger.info(f"To file {fingerprint_distances_path}") fingerprint_distance_generator.to_csv(fingerprint_distances_path) return fingerprint_distance_generator
def test_from_feature_distances_generator( self, feature_distances_generator, feature_weights, structure_kinase_ids, structure_pair_ids, kinase_pair_ids, structure_ids, kinase_ids, ): """ Test FingerprintDistanceGenerator class attributes. """ # FingerprintDistanceGenerator fingerprint_distance_generator = ( FingerprintDistanceGenerator.from_feature_distances_generator( feature_distances_generator ) ) # Test attributes assert isinstance(fingerprint_distance_generator.data, pd.DataFrame) data_columns = [ "structure.1", "structure.2", "kinase.1", "kinase.2", "distance", "bit_coverage", ] assert fingerprint_distance_generator.data.columns.to_list() == data_columns assert fingerprint_distance_generator.structure_kinase_ids == structure_kinase_ids # Test properties assert fingerprint_distance_generator.structure_pair_ids == structure_pair_ids assert fingerprint_distance_generator.kinase_pair_ids == kinase_pair_ids assert fingerprint_distance_generator.structure_ids == structure_ids assert fingerprint_distance_generator.kinase_ids == kinase_ids assert isinstance(fingerprint_distance_generator.distances, np.ndarray) assert isinstance(fingerprint_distance_generator.bit_coverages, np.ndarray)
def test_main_compare(fingerprint_generator, args): """ Test CLI for encoding using subprocesses. """ args = args.split() input_filepath = Path(args[3]) output_path = Path(args[5]) # Generate fingerprint_generator.to_json(input_filepath) subprocess.run(args, check=True) ### Feature distances generator # CSV file there? feature_distances_filepath = output_path / "feature_distances.csv" assert feature_distances_filepath.exists() # CSV file can be loaded as FeatureDistancesGeneration object? feature_distances_generator = FeatureDistancesGenerator.from_csv( feature_distances_filepath) assert isinstance(feature_distances_generator, FeatureDistancesGenerator) assert isinstance(feature_distances_generator.data, pd.DataFrame) ### Fingerprint distance generator # CSV file there? fingerprint_distance_filepath = output_path / "fingerprint_distances.csv" assert fingerprint_distance_filepath.exists() # CSV file can be loaded as FingerprintDistanceGeneration object? fingerprint_distance_generator = FingerprintDistanceGenerator.from_csv( fingerprint_distance_filepath) assert isinstance(fingerprint_distance_generator, FingerprintDistanceGenerator) assert isinstance(fingerprint_distance_generator.data, pd.DataFrame) # Delete file - cannot be done within enter_tmp_directory, since temporary files # apparently cannot be read from CLI input_filepath.unlink() feature_distances_filepath.unlink() fingerprint_distance_filepath.unlink()