Ejemplo n.º 1
0
def test_validity_correct_molecules():
    smiles_1 = 'O'
    smiles_2 = 'C'
    smiles_3 = 'CC(ONONOC)CCCc1ccccc1'

    assert is_valid(smiles_1)
    assert is_valid(smiles_2)
    assert is_valid(smiles_3)
Ejemplo n.º 2
0
def sample_valid_molecules(model: DistributionMatchingGenerator, number_molecules: int, max_tries=10) -> List[str]:
    """
    Sample from the given generator until the desired number of valid molecules
    has been sampled (i.e., ignore invalid molecules).

    Args:
        model: model to sample from
        number_molecules: number of valid molecules to generate
        max_tries: determines the maximum number N of samples to draw, N = number_molecules * max_tries

    Returns:
        A list of number_molecules valid molecules. If this was not possible with the given max_tries, the list may be shorter.
    """

    max_samples = max_tries * number_molecules
    number_already_sampled = 0

    valid_molecules: List[str] = []

    while len(valid_molecules) < number_molecules and number_already_sampled < max_samples:
        remaining_to_sample = number_molecules - len(valid_molecules)

        samples = model.generate(remaining_to_sample)
        number_already_sampled += remaining_to_sample

        valid_molecules += [m for m in samples if is_valid(m)]

    return valid_molecules
Ejemplo n.º 3
0
    def assess_model(
        self, model: DistributionMatchingGenerator
    ) -> DistributionLearningBenchmarkResult:
        start_time = time.time()
        molecules = model.generate(number_samples=self.number_samples)
        end_time = time.time()

        if len(molecules) != self.number_samples:
            raise Exception(
                "The model did not generate the correct number of molecules")

        number_valid = sum(1 if is_valid(smiles) else 0
                           for smiles in molecules)
        validity_ratio = number_valid / self.number_samples
        metadata = {
            "number_samples": self.number_samples,
            "number_valid": number_valid
        }

        return DistributionLearningBenchmarkResult(
            benchmark_name=self.name,
            score=validity_ratio,
            sampling_time=end_time - start_time,
            metadata=metadata,
        )
Ejemplo n.º 4
0
def test_validity_incorrect_syntax():
    smiles = 'CCCincorrectsyntaxCCC'
    assert not is_valid(smiles)
Ejemplo n.º 5
0
def test_validity_empty_molecule():
    smiles = ''
    assert not is_valid(smiles)
Ejemplo n.º 6
0
def test_validity_incorrect_valence():
    smiles = 'CCC(CC)(CC)(=O)CCC'
    assert not is_valid(smiles)