コード例 #1
0
    def assess_model(self, model: DistributionMatchingGenerator) -> DistributionLearningBenchmarkResult:
        chemnet = self._load_chemnet()

        start_time = time.time()
        generated_molecules = sample_valid_molecules(model=model, number_molecules=self.number_samples)
        end_time = time.time()

        if len(generated_molecules) != self.number_samples:
            logger.warning('The model could not generate enough valid molecules.')

        mu_ref, cov_ref = self._calculate_distribution_statistics(chemnet, self.reference_molecules)
        mu, cov = self._calculate_distribution_statistics(chemnet, generated_molecules)

        FCD = fcd.calculate_frechet_distance(mu1=mu_ref, mu2=mu,
                                             sigma1=cov_ref, sigma2=cov)
        score = np.exp(-0.2 * FCD)

        metadata = {
            'number_reference_molecules': len(self.reference_molecules),
            'number_generated_molecules': len(generated_molecules),
            'FCD': FCD
        }

        return DistributionLearningBenchmarkResult(benchmark_name=self.name,
                                                   score=score,
                                                   sampling_time=end_time - start_time,
                                                   metadata=metadata)
コード例 #2
0
    def __call__(self, valid_molecule_bags: typing.List[multiset.BaseMultiset]):
        if not len(valid_molecule_bags) >= self.fcd_scorer.sample_size:
            print(f"less samples than ideal... @{len(valid_molecule_bags)}")
            sample_size = len(valid_molecule_bags)
        else:
            sample_size = self.fcd_scorer.sample_size

        # Sample the generated molecule multisets
        samples_bags = random.sample(valid_molecule_bags, sample_size)

        # And then for each pick one molecule randomly from the multiset:
        samples = []
        for s in samples_bags:
            samples.append(random.choice(list(s.distinct_elements())))

        chemnet = self.chemnet

        print("FCD: calculating dist stats on training data...")
        mu_ref, cov_ref = self.cached_ref_stats

        print("FCD: calculating dist stats on new generated molecules...")
        mu, cov = self.fcd_scorer._calculate_distribution_statistics(chemnet, samples)

        print("FCD: ... computed stats!")

        FCD = fcd.calculate_frechet_distance(mu1=mu_ref, mu2=mu,
                                             sigma1=cov_ref, sigma2=cov)
        # ^ See note 2 in class docstring.

        return FCD