예제 #1
0
def _get_case_data(mongo_client):
    """
    Get a :class:`.CaseData` instance.

    Parameters
    ----------
    mongo_client : :class:`pymongo.MongoClient`
        The mongo client the database should connect to.

    """

    # The basic idea here is that the _counter.get_count method will
    # return a different "fitness value" each time it is called.
    # When the test runs fitness_calculator.get_fitness_value(), if
    # caching is working, the same number as before will be returned.
    # However, if caching is not working, a different number will be
    # returned as the fitness value.

    db = stk.ValueMongoDb(
        mongo_client=mongo_client,
        collection='test_caching',
        database='_stk_pytest_database',
    )

    fitness_calculator = stk.PropertyVector(
        property_functions=(_counter.get_count, ),
        input_database=db,
        output_database=db,
    )
    molecule = stk.BuildingBlock('BrCCBr')
    fitness_value = fitness_calculator.get_fitness_value(molecule)

    return CaseData(
        fitness_calculator=fitness_calculator,
        molecule=molecule,
        fitness_value=fitness_value,
    )
예제 #2
0
    scores = []
    for bb in mol.get_building_blocks():
        rdkit_mol = bb.to_rdkit_mol()
        rdkit_mol.UpdatePropertyCache()
        rdkit.GetSymmSSSR(rdkit_mol)
        rdkit_mol.GetRingInfo()
        scores.append(scscorer(rdkit_mol)[1])
    return sum(scores)


# Defines synthetic accesibility function to use.
synthetic_accesibility_func = scscore

cage_fitness_calculator = stk.PropertyVector(
    pore_diameter,
    largest_window,
    window_std,
    synthetic_accesibility_func,
)

fitness_calculator = stk.If(
    condition=lambda mol: failed_optimizer.is_in_cache(mol),
    true_calculator=stk.FitnessFunction(lambda mol: None),
    false_calculator=cage_fitness_calculator,
)

# #####################################################################
# Fitness normalizer.
# #####################################################################


def valid_fitness(population, mol):
예제 #3
0
    pw_mol = pywindow.Molecule.load_rdkit_mol(mol.to_rdkit_mol())
    mol.pore_diameter = abs(pw_mol.calculate_pore_diameter() - 5)
    return mol.pore_diameter


def window_std(mol):
    pw_mol = pywindow.Molecule.load_rdkit_mol(mol.to_rdkit_mol())
    windows = pw_mol.calculate_windows()
    mol.window_std = None
    if windows is not None and len(windows) > 3:
        mol.window_std = np.std(windows)
    return mol.window_std


fitness_calculator = stk.PropertyVector(
    pore_diameter,
    window_std,
)


def valid_fitness(population, mol):
    return None not in population.get_fitness_values()[mol]


fitness_normalizer = stk.Sequence(
    stk.Power([1, -1], filter=valid_fitness),
    stk.DivideByMean(filter=valid_fitness),
    stk.Multiply([1.0, 1.0], filter=valid_fitness),
    stk.Sum(filter=valid_fitness),
    stk.ReplaceFitness(
        replacement_fn=lambda population:
            min(
예제 #4
0
파일: serial.py 프로젝트: fiszczyp/stk
# #####################################################################
# Optimizer.
# #####################################################################

optimizer = stk.NullOptimizer(use_cache=True)

# #####################################################################
# Fitness calculator.
# #####################################################################


def num_atoms(mol):
    return len(mol.atoms)


fitness_calculator = stk.PropertyVector(num_atoms)

# #####################################################################
# Fitness normalizer.
# #####################################################################

# The PropertyVector fitness calculator will set the fitness as
# [n_atoms] use the Sum() fitness normalizer to convert the fitness to
# just n_atoms^0.5. The sqrt is because we use the Power normalizer.
fitness_normalizer = stk.NormalizerSequence(stk.Power(0.5), stk.Sum())

# #####################################################################
# Exit condition.
# #####################################################################

terminator = stk.NumGenerations(25)
예제 #5
0
import numpy as np
import pytest

import stk

from ..case_data import CaseData


@pytest.fixture(
    scope='session',
    params=(lambda: CaseData(
        fitness_calculator=stk.PropertyVector(property_functions=(
            stk.Molecule.get_num_atoms,
            stk.Molecule.get_num_bonds,
            stk.Molecule.get_maximum_diameter,
        ), ),
        molecule=stk.BuildingBlock('BrCCBr').with_position_matrix(
            position_matrix=np.array([
                [0, 0, 0],
                [10, 0, 0],
                [0, 0, 0],
                [0, 0, 0],
                [0, 0, 0],
                [0, 0, 0],
                [0, 0, 0],
                [0, 0, 0],
            ],
                                     dtype=np.float64), ),
        fitness_value=(8, 7, 10),
    ), ),
)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--mongodb_uri',
                        help='The MongoDB URI for the database to connect to.',
                        default='mongodb://localhost:27017/')
    args = parser.parse_args()

    logging.basicConfig(level=logging.INFO)

    # Use a random seed to get reproducible results.
    random_seed = 4
    generator = np.random.RandomState(random_seed)

    logger.info('Making building blocks.')

    # Load the building block databases.
    fluoros = tuple(
        get_building_blocks(
            path=pathlib.Path(__file__).parent / 'fluoros.txt',
            functional_group_factory=stk.FluoroFactory(),
        ))
    bromos = tuple(
        get_building_blocks(
            path=pathlib.Path(__file__).parent / 'bromos.txt',
            functional_group_factory=stk.BromoFactory(),
        ))

    initial_population = tuple(get_initial_population(fluoros, bromos))
    # Write the initial population.
    for i, record in enumerate(initial_population):
        write(record.get_molecule(), f'initial_{i}.mol')

    client = pymongo.MongoClient(args.mongodb_uri)
    db = stk.ConstructedMoleculeMongoDb(client)
    fitness_db = stk.ValueMongoDb(client, 'fitness_values')

    # Plot selections.
    generation_selector = stk.Best(
        num_batches=25,
        duplicate_molecules=False,
    )
    stk.SelectionPlotter('generation_selection', generation_selector)

    mutation_selector = stk.Roulette(
        num_batches=5,
        random_seed=generator.randint(0, 1000),
    )
    stk.SelectionPlotter('mutation_selection', mutation_selector)

    crossover_selector = stk.Roulette(
        num_batches=3,
        batch_size=2,
        random_seed=generator.randint(0, 1000),
    )
    stk.SelectionPlotter('crossover_selection', crossover_selector)

    fitness_calculator = stk.PropertyVector(
        property_functions=(
            get_num_rotatable_bonds,
            get_complexity,
            get_num_bad_rings,
        ),
        input_database=fitness_db,
        output_database=fitness_db,
    )

    fitness_normalizer = stk.NormalizerSequence(
        fitness_normalizers=(
            # Prevent division by 0 error in DivideByMean, by ensuring
            # a value of each property to be at least 1.
            stk.Add((1, 1, 1)),
            stk.DivideByMean(),
            # Obviously, because all coefficients are equal, the
            # Multiply normalizer does not need to be here. However,
            # it's here to show that you can easily change the relative
            # importance of each component of the fitness value, by
            # changing the values of the coefficients.
            stk.Multiply((1, 1, 1)),
            stk.Sum(),
            stk.Power(-1),
        ), )

    ea = stk.EvolutionaryAlgorithm(
        num_processes=1,
        initial_population=initial_population,
        fitness_calculator=fitness_calculator,
        mutator=stk.RandomMutator(
            mutators=(
                stk.RandomBuildingBlock(
                    building_blocks=fluoros,
                    is_replaceable=is_fluoro,
                    random_seed=generator.randint(0, 1000),
                ),
                stk.SimilarBuildingBlock(
                    building_blocks=fluoros,
                    is_replaceable=is_fluoro,
                    random_seed=generator.randint(0, 1000),
                ),
                stk.RandomBuildingBlock(
                    building_blocks=bromos,
                    is_replaceable=is_bromo,
                    random_seed=generator.randint(0, 1000),
                ),
                stk.SimilarBuildingBlock(
                    building_blocks=bromos,
                    is_replaceable=is_bromo,
                    random_seed=generator.randint(0, 1000),
                ),
            ),
            random_seed=generator.randint(0, 1000),
        ),
        crosser=stk.GeneticRecombination(get_gene=get_functional_group_type, ),
        generation_selector=generation_selector,
        mutation_selector=mutation_selector,
        crossover_selector=crossover_selector,
        fitness_normalizer=fitness_normalizer,
    )

    logger.info('Starting EA.')

    generations = []
    for generation in ea.get_generations(50):
        for record in generation.get_molecule_records():
            db.put(record.get_molecule())
        generations.append(generation)

    # Write the final population.
    for i, record in enumerate(generation.get_molecule_records()):
        write(record.get_molecule(), f'final_{i}.mol')

    logger.info('Making fitness plot.')

    # Normalize the fitness values across the entire EA before
    # plotting the fitness values.
    generations = tuple(
        normalize_generations(
            fitness_calculator=fitness_calculator,
            fitness_normalizer=fitness_normalizer,
            generations=generations,
        ))

    fitness_progress = stk.ProgressPlotter(
        generations=generations,
        get_property=lambda record: record.get_fitness_value(),
        y_label='Fitness Value',
    )
    fitness_progress.write('fitness_progress.png')
    fitness_progress.get_plot_data().to_csv('fitness_progress.csv')

    logger.info('Making rotatable bonds plot.')

    rotatable_bonds_progress = stk.ProgressPlotter(
        generations=generations,
        get_property=lambda record: get_num_rotatable_bonds(record.
                                                            get_molecule()),
        y_label='Number of Rotatable Bonds',
    )
    rotatable_bonds_progress.write('rotatable_bonds_progress.png')