# if True: # gi.delete_cache() # gi = GrammarInitializer(grammar_cache) # max_steps_smiles = gi.init_grammar(1000) max_steps = 30 model, gen_fitter, disc_fitter = train_policy_gradient( molecules, grammar, EPOCHS=100, BATCH_SIZE=batch_size, reward_fun_on=reward_fun, max_steps=max_steps, lr_on=0.3e-5, lr_discrim=5e-4, p_thresh=0.3, randomize_reward=True, drop_rate=drop_rate, decoder_type='attn_graph', # 'attention', plot_prefix='hg ', dashboard='true_reward_lenient_rand30_v2', # 'policy gradient', save_file='adv_orig_reward_lrand30.h5', smiles_save_file=None, # 'pg_smiles_hg1.h5', on_policy_loss_type='advantage', #''best', off_policy_loss_type='mean') # preload_file='policy_gradient_run.h5') while True: next(gen_fitter) for _ in range(1): next(disc_fitter)
# gi = GrammarInitializer(grammar_cache) root_name = 'canned_' + ver + '_' + str(obj_num) + 'do 0.5 lr4e-5' max_steps = 45 model, gen_fitter, disc_fitter = train_policy_gradient( molecules, grammar, EPOCHS=100, BATCH_SIZE=batch_size, reward_fun_on=reward_fun, max_steps=max_steps, lr_on=4e-5, lr_discrim=0.0, discrim_wt=0.0, p_thresh=-10, drop_rate=drop_rate, reward_sm=0.0, decoder_type='attn_graph_node', #'rnn_graph',# 'attention', plot_prefix='', dashboard=root_name, # 'policy gradient', save_file_root_name=root_name, preload_file_root_name= root_name, #'guacamol_ar_emb_node_rpev2_0lr2e-5',#'guacamol_ar_nodev2_0lr2e-5',#root_name, smiles_save_file=root_name.replace(' ', '_') + '_smiles_2.zip', on_policy_loss_type='advantage_record', node_temperature_schedule=lambda x: 100, eps=0.0, priors='conditional') # preload_file='policy_gradient_run.h5') while True: next(gen_fitter)
# gi.delete_cache() # gi = GrammarInitializer(grammar_cache) # max_steps_smiles = gi.init_grammar(100) max_steps = 50 model, gen_fitter, disc_fitter = train_policy_gradient( molecules, grammar, EPOCHS=100, BATCH_SIZE=batch_size, reward_fun_on=reward_fun, max_steps=max_steps, lr_on=3e-5, lr_discrim=5e-4, discrim_wt=0.0, p_thresh=-10, drop_rate=drop_rate, reward_sm=0.0, decoder_type='attn_graph', # 'attention', plot_prefix='', dashboard='guacamol_' + ver + '_' + str(obj_num) + 'a', # 'policy gradient', save_file_root_name='guacamol_' + ver + '_' + str(obj_num) + '.h5', smiles_save_file=None, # 'pg_smiles_hg1.h5', on_policy_loss_type='advantage', #''best', off_policy_loss_type='mean') # preload_file='policy_gradient_run.h5') while True: next(gen_fitter) # for _ in range(1):
for obj_num in range(20): try: root_name = 'guac_' + ver + '_' + str(obj_num) + 'do_0.5_lr4e-5_mark' max_steps = 45 model, gen_fitter, disc_fitter = train_policy_gradient( molecules, grammar, EPOCHS=100, BATCH_SIZE=batch_size, reward_fun_on=reward_fun, max_steps=max_steps, lr_on=4e-5, lr_discrim=5e-4, discrim_wt=0.0, p_thresh=-10, drop_rate=drop_rate, reward_sm=0.0, decoder_type='attn_graph_node', #'rnn_graph',# 'attention', plot_prefix='', dashboard=root_name, # 'policy gradient', save_file_root_name=root_name, preload_file_root_name=root_name, smiles_save_file=None, # 'pg_smiles_hg1.h5', on_policy_loss_type='advantage_record', half_float=False, node_temperature_schedule=lambda x: 100, eps=2.0) except Exception as ex: print('{}: {}({})'.format(obj_num, type(ex), str(ex))) # preload_file='policy_gradient_run.h5')
reward_fun = lambda x: np.tanh(0.1*scorer(x)) + atom_penalty(x) # lambda x: reward_aromatic_rings(x)# # later will run this ahead of time # gi = GrammarInitializer(grammar_cache) max_steps = 60 root_name = 'classic_logP' model, gen_fitter, disc_fitter = train_policy_gradient(molecules, grammar, EPOCHS=100, BATCH_SIZE=batch_size, reward_fun_on=reward_fun, max_steps=max_steps, lr_on=3e-5, lr_discrim=5e-4, discrim_wt=0.3, p_thresh=-10, drop_rate=drop_rate, reward_sm=0.5, decoder_type='attn_graph_node', # 'attention', plot_prefix='', dashboard=root_name, # 'policy gradient', save_file_root_name=root_name, smiles_save_file=root_name + '_smiles', on_policy_loss_type='advantage_record', half_float=False, priors='conditional') # preload_file='policy_gradient_run.h5') while True: next(gen_fitter) for _ in range(1): next(disc_fitter)
# if True: # gi.delete_cache() # gi = GrammarInitializer(grammar_cache) # max_steps_smiles = gi.init_grammar(100) max_steps = 50 model, gen_fitter, disc_fitter = train_policy_gradient( molecules, grammar, EPOCHS=100, BATCH_SIZE=batch_size, reward_fun_on=reward_fun, max_steps=max_steps, lr_on=3e-5, lr_discrim=5e-4, discrim_wt=0.25, p_thresh=-10, drop_rate=drop_rate, reward_sm=0.5, decoder_type='attn_graph', # 'attention', plot_prefix='hg ', dashboard='reward_linear_faster_half_2', # 'policy gradient', save_file_root_name='true_reward_linear_faster_half_2.h5', smiles_save_file=None, # 'pg_smiles_hg1.h5', on_policy_loss_type='advantage', #''best', off_policy_loss_type='mean') # preload_file='policy_gradient_run.h5') while True: next(gen_fitter) for _ in range(1): next(disc_fitter)
root_name = 'xtest91' + ver + '_' + str(obj_num) + '_lr4e-5' max_steps = 60 model, gen_fitter, disc_fitter = train_policy_gradient( molecules, grammar, EPOCHS=100, BATCH_SIZE=batch_size, reward_fun_on=reward_fun, max_steps=max_steps, lr_on=0.05, # crazy, to be used with the cosine schedule! lr_discrim=0.0, # lr_schedule=shifted_cosine_schedule, discrim_wt=0.0, p_thresh=-10, drop_rate=drop_rate, reward_sm=0.0, decoder_type='graph_conditional', #'rnn_graph',# 'attention', plot_prefix='', dashboard=root_name, # 'policy gradient', save_file_root_name=root_name, preload_file_root_name= None, #root_name, #'guacamol_ar_emb_node_rpev2_0lr2e-5',#'guacamol_ar_nodev2_0lr2e-5',#root_name, smiles_save_file=root_name.replace(' ', '_') + '_smiles.zip', on_policy_loss_type='advantage_record', rule_temperature_schedule=toothy_exp_schedule, eps=0.0, priors='conditional', entropy_wgt=1.0) # preload_file='policy_gradient_run.h5') while True: