def assess_model(self, model: DistributionMatchingGenerator) -> DistributionLearningBenchmarkResult: chemnet = self._load_chemnet() start_time = time.time() generated_molecules = sample_valid_molecules(model=model, number_molecules=self.number_samples) end_time = time.time() if len(generated_molecules) != self.number_samples: logger.warning('The model could not generate enough valid molecules.') mu_ref, cov_ref = self._calculate_distribution_statistics(chemnet, self.reference_molecules) mu, cov = self._calculate_distribution_statistics(chemnet, generated_molecules) FCD = fcd.calculate_frechet_distance(mu1=mu_ref, mu2=mu, sigma1=cov_ref, sigma2=cov) score = np.exp(-0.2 * FCD) metadata = { 'number_reference_molecules': len(self.reference_molecules), 'number_generated_molecules': len(generated_molecules), 'FCD': FCD } return DistributionLearningBenchmarkResult(benchmark_name=self.name, score=score, sampling_time=end_time - start_time, metadata=metadata)
def __call__(self, valid_molecule_bags: typing.List[multiset.BaseMultiset]): if not len(valid_molecule_bags) >= self.fcd_scorer.sample_size: print(f"less samples than ideal... @{len(valid_molecule_bags)}") sample_size = len(valid_molecule_bags) else: sample_size = self.fcd_scorer.sample_size # Sample the generated molecule multisets samples_bags = random.sample(valid_molecule_bags, sample_size) # And then for each pick one molecule randomly from the multiset: samples = [] for s in samples_bags: samples.append(random.choice(list(s.distinct_elements()))) chemnet = self.chemnet print("FCD: calculating dist stats on training data...") mu_ref, cov_ref = self.cached_ref_stats print("FCD: calculating dist stats on new generated molecules...") mu, cov = self.fcd_scorer._calculate_distribution_statistics(chemnet, samples) print("FCD: ... computed stats!") FCD = fcd.calculate_frechet_distance(mu1=mu_ref, mu2=mu, sigma1=cov_ref, sigma2=cov) # ^ See note 2 in class docstring. return FCD