def compare_from_cli(args): """ Compare fingerprints. Parameters ---------- args : argsparse.Namespace CLI arguments. """ configure_logger(Path(args.output) / "distances.log") fingerprint_generator = FingerprintGenerator.from_json(args.input) compare(fingerprint_generator, args.output, args.weights, args.ncores)
def outliers(fingerprints_path, distance_cutoff, fingerprints_wo_outliers_path=None): """ Remove outlier fingerprints (defined by spatial distances maximum). Parameters ---------- fingerprints_path : str or pathlib.Path Path to fingerprints JSON file. distance_cutoff : float Tolerated distance maximum; fingerprints with distances greater than this cutoff will be removed. fingerprints_wo_outliers_path : None or str or pathlib.Path Path to fingerprints JSON file with outliers removed. Returns ------- kissim.encoding.FingerprintGenerator Fingerprints without outliers. """ # Load fingerprints logger.info("Read fingerprints...") fingerprints_path = Path(fingerprints_path) fingerprint_generator = FingerprintGenerator.from_json(fingerprints_path) logger.info(f"Number of fingerprints: {len(fingerprint_generator.data)}") # Find structures/fingerprints IDs to be removed logger.info(f"Use the following distance minimum/maximum cutoffs" f" to identify outlier structures: {distance_cutoff}") remove_structure_ids = [] for structure_id, fp in fingerprint_generator.data.items(): if (fp.distances > distance_cutoff).any().any(): remove_structure_ids.append(structure_id) logger.info(f"Structure IDs to be removed: {remove_structure_ids}") # Remove fingerprints logger.info("Remove fingerprints with distance outliers...") for structure_id in remove_structure_ids: del fingerprint_generator.data[structure_id] logger.info(f"Number of fingerprints: {len(fingerprint_generator.data)}") # Optionally: Save to file if fingerprints_wo_outliers_path is not None: logger.info( f"Save cleaned fingerprints to {fingerprints_wo_outliers_path}...") fingerprints_wo_outliers_path = Path(fingerprints_wo_outliers_path) fingerprint_generator.to_json(fingerprints_wo_outliers_path) return fingerprint_generator
def test_main_encode(args): """ Test CLI for encoding using subprocesses. """ output = Path("fingerprints.json") args = args.split() with enter_temp_directory(): subprocess.run(args, check=True) # Json file there? assert output.exists() # Log file there? assert Path(f"{output.stem}.log").exists() # Json file can be loaded as FingerprintGenerator object? fingerprint_generator = FingerprintGenerator.from_json(output) assert isinstance(fingerprint_generator, FingerprintGenerator) assert isinstance(list(fingerprint_generator.data.values())[0], Fingerprint)
def test_to_from_json(self, structure_klifs_ids, normalize, values_array_sum): """ Test if saving/loading a fingerprint to/from a json file. """ fingerprints = FingerprintGenerator.from_structure_klifs_ids( structure_klifs_ids, LOCAL, 1) json_filepath = Path("fingerprints.json") with enter_temp_directory(): # Save json file fingerprints.to_json(json_filepath) assert json_filepath.exists() # Load json file fingerprints_reloaded = FingerprintGenerator.from_json( json_filepath, normalize) assert isinstance(fingerprints_reloaded, FingerprintGenerator) # Attribute data assert list(fingerprints.data.keys()) == list( fingerprints_reloaded.data.keys()) if normalize: assert list(fingerprints.data_normalized.keys()) == list( fingerprints_reloaded.data_normalized.keys()) else: assert fingerprints_reloaded.data_normalized is None values_array_sum_calculated = sum([ np.nansum(fingerprint.values_array(True, True, True)) for structure_klifs_id, fingerprint in fingerprints_reloaded.data.items() ]) assert pytest.approx(values_array_sum_calculated, abs=1e-4) == values_array_sum