def reward_length(smiles):
    '''
    A simple reward to encourage larger molecule length
    :param smiles: list of strings
    :return: reward, list of float
    '''
    if not len(smiles):
        return -1  # an empty string is invalid for our purposes
    atoms = num_atoms(smiles)
    return np.array([-1 if num is None else num for num in atoms])
Esempio n. 2
0
# from generative_playground.models.problem.rl.DeepRL_wrappers import BodyAdapter, MyA2CAgent
from generative_playground.molecules.model_settings import get_settings
from generative_playground.molecules.train.pg.hypergraph.main_train_policy_gradient_minimal import train_policy_gradient
from generative_playground.codec.hypergraph_grammar import GrammarInitializer



batch_size = 8# 20
drop_rate = 0.5
molecules = True
grammar_cache = 'hyper_grammar_guac_10k_with_clique_collapse.pickle'#'hyper_grammar.pickle'
grammar = 'hypergraph:' + grammar_cache
# settings = get_settings(molecules, grammar)
# max_steps = 277  # settings['max_seq_length']
invalid_value = -3.5
atom_penalty = lambda x: -0.05*(np.maximum(37, np.array(num_atoms(x)))-37)
scorer = NormalizedScorer(invalid_value=invalid_value, normalize_scores=True)
reward_fun = lambda x: np.tanh(0.1*scorer(x)) + atom_penalty(x) # lambda x: reward_aromatic_rings(x)#
# later will run this ahead of time
# gi = GrammarInitializer(grammar_cache)

max_steps = 60
root_name = 'classic_logP'
model, gen_fitter, disc_fitter = train_policy_gradient(molecules,
                                                       grammar,
                                                       EPOCHS=100,
                                                       BATCH_SIZE=batch_size,
                                                       reward_fun_on=reward_fun,
                                                       max_steps=max_steps,
                                                       lr_on=3e-5,
                                                       lr_discrim=5e-4,