Python train_policy_gradient Examples, generative_playground.molecules.train.pg.hypergraph.main_train_policy_gradient_minimal.train_policy_gradient Python Examples

Example #1

0

Show file

# if True:
#     gi.delete_cache()
#     gi = GrammarInitializer(grammar_cache)
#     max_steps_smiles = gi.init_grammar(1000)

max_steps = 30
model, gen_fitter, disc_fitter = train_policy_gradient(
    molecules,
    grammar,
    EPOCHS=100,
    BATCH_SIZE=batch_size,
    reward_fun_on=reward_fun,
    max_steps=max_steps,
    lr_on=0.3e-5,
    lr_discrim=5e-4,
    p_thresh=0.3,
    randomize_reward=True,
    drop_rate=drop_rate,
    decoder_type='attn_graph',  # 'attention',
    plot_prefix='hg ',
    dashboard='true_reward_lenient_rand30_v2',  # 'policy gradient',
    save_file='adv_orig_reward_lrand30.h5',
    smiles_save_file=None,  # 'pg_smiles_hg1.h5',
    on_policy_loss_type='advantage',  #''best',
    off_policy_loss_type='mean')
# preload_file='policy_gradient_run.h5')

while True:
    next(gen_fitter)
    for _ in range(1):
        next(disc_fitter)

Example #2

0

Show file

# gi = GrammarInitializer(grammar_cache)

root_name = 'canned_' + ver + '_' + str(obj_num) + 'do 0.5 lr4e-5'
max_steps = 45
model, gen_fitter, disc_fitter = train_policy_gradient(
    molecules,
    grammar,
    EPOCHS=100,
    BATCH_SIZE=batch_size,
    reward_fun_on=reward_fun,
    max_steps=max_steps,
    lr_on=4e-5,
    lr_discrim=0.0,
    discrim_wt=0.0,
    p_thresh=-10,
    drop_rate=drop_rate,
    reward_sm=0.0,
    decoder_type='attn_graph_node',  #'rnn_graph',# 'attention',
    plot_prefix='',
    dashboard=root_name,  # 'policy gradient',
    save_file_root_name=root_name,
    preload_file_root_name=
    root_name,  #'guacamol_ar_emb_node_rpev2_0lr2e-5',#'guacamol_ar_nodev2_0lr2e-5',#root_name,
    smiles_save_file=root_name.replace(' ', '_') + '_smiles_2.zip',
    on_policy_loss_type='advantage_record',
    node_temperature_schedule=lambda x: 100,
    eps=0.0,
    priors='conditional')
# preload_file='policy_gradient_run.h5')

while True:
    next(gen_fitter)

Example #3

0

Show file

File: train_adv_reward_linear_guacamol.py Project: iisuslik43/generative_playground

#     gi.delete_cache()
#     gi = GrammarInitializer(grammar_cache)
#     max_steps_smiles = gi.init_grammar(100)

max_steps = 50
model, gen_fitter, disc_fitter = train_policy_gradient(
    molecules,
    grammar,
    EPOCHS=100,
    BATCH_SIZE=batch_size,
    reward_fun_on=reward_fun,
    max_steps=max_steps,
    lr_on=3e-5,
    lr_discrim=5e-4,
    discrim_wt=0.0,
    p_thresh=-10,
    drop_rate=drop_rate,
    reward_sm=0.0,
    decoder_type='attn_graph',  # 'attention',
    plot_prefix='',
    dashboard='guacamol_' + ver + '_' + str(obj_num) +
    'a',  # 'policy gradient',
    save_file_root_name='guacamol_' + ver + '_' + str(obj_num) + '.h5',
    smiles_save_file=None,  # 'pg_smiles_hg1.h5',
    on_policy_loss_type='advantage',  #''best',
    off_policy_loss_type='mean')
# preload_file='policy_gradient_run.h5')

while True:
    next(gen_fitter)
    # for _ in range(1):

Example #4

0

Show file

for obj_num in range(20):
    try:
        root_name = 'guac_' + ver + '_' + str(obj_num) + 'do_0.5_lr4e-5_mark'
        max_steps = 45
        model, gen_fitter, disc_fitter = train_policy_gradient(
            molecules,
            grammar,
            EPOCHS=100,
            BATCH_SIZE=batch_size,
            reward_fun_on=reward_fun,
            max_steps=max_steps,
            lr_on=4e-5,
            lr_discrim=5e-4,
            discrim_wt=0.0,
            p_thresh=-10,
            drop_rate=drop_rate,
            reward_sm=0.0,
            decoder_type='attn_graph_node',  #'rnn_graph',# 'attention',
            plot_prefix='',
            dashboard=root_name,  # 'policy gradient',
            save_file_root_name=root_name,
            preload_file_root_name=root_name,
            smiles_save_file=None,  # 'pg_smiles_hg1.h5',
            on_policy_loss_type='advantage_record',
            half_float=False,
            node_temperature_schedule=lambda x: 100,
            eps=2.0)
    except Exception as ex:
        print('{}: {}({})'.format(obj_num, type(ex), str(ex)))
# preload_file='policy_gradient_run.h5')

Example #5

0

Show file

reward_fun = lambda x: np.tanh(0.1*scorer(x)) + atom_penalty(x) # lambda x: reward_aromatic_rings(x)#
# later will run this ahead of time
# gi = GrammarInitializer(grammar_cache)

max_steps = 60
root_name = 'classic_logP'
model, gen_fitter, disc_fitter = train_policy_gradient(molecules,
                                                       grammar,
                                                       EPOCHS=100,
                                                       BATCH_SIZE=batch_size,
                                                       reward_fun_on=reward_fun,
                                                       max_steps=max_steps,
                                                       lr_on=3e-5,
                                                       lr_discrim=5e-4,
                                                       discrim_wt=0.3,
                                                       p_thresh=-10,
                                                       drop_rate=drop_rate,
                                                       reward_sm=0.5,
                                                       decoder_type='attn_graph_node',  # 'attention',
                                                       plot_prefix='',
                                                       dashboard=root_name,  # 'policy gradient',
                                                       save_file_root_name=root_name,
                                                       smiles_save_file=root_name + '_smiles',
                                                       on_policy_loss_type='advantage_record',
                                                       half_float=False,
                                                       priors='conditional')
# preload_file='policy_gradient_run.h5')

while True:
    next(gen_fitter)
    for _ in range(1):
        next(disc_fitter)

Example #6

0

Show file

File: train_adv_reward_linear.py Project: iisuslik43/generative_playground

# if True:
#     gi.delete_cache()
#     gi = GrammarInitializer(grammar_cache)
#     max_steps_smiles = gi.init_grammar(100)

max_steps = 50
model, gen_fitter, disc_fitter = train_policy_gradient(
    molecules,
    grammar,
    EPOCHS=100,
    BATCH_SIZE=batch_size,
    reward_fun_on=reward_fun,
    max_steps=max_steps,
    lr_on=3e-5,
    lr_discrim=5e-4,
    discrim_wt=0.25,
    p_thresh=-10,
    drop_rate=drop_rate,
    reward_sm=0.5,
    decoder_type='attn_graph',  # 'attention',
    plot_prefix='hg ',
    dashboard='reward_linear_faster_half_2',  # 'policy gradient',
    save_file_root_name='true_reward_linear_faster_half_2.h5',
    smiles_save_file=None,  # 'pg_smiles_hg1.h5',
    on_policy_loss_type='advantage',  #''best',
    off_policy_loss_type='mean')
# preload_file='policy_gradient_run.h5')

while True:
    next(gen_fitter)
    for _ in range(1):
        next(disc_fitter)

Example #7

0

Show file

root_name = 'xtest91' + ver + '_' + str(obj_num) + '_lr4e-5'
max_steps = 60
model, gen_fitter, disc_fitter = train_policy_gradient(
    molecules,
    grammar,
    EPOCHS=100,
    BATCH_SIZE=batch_size,
    reward_fun_on=reward_fun,
    max_steps=max_steps,
    lr_on=0.05,  # crazy, to be used with the cosine schedule!
    lr_discrim=0.0,
    # lr_schedule=shifted_cosine_schedule,
    discrim_wt=0.0,
    p_thresh=-10,
    drop_rate=drop_rate,
    reward_sm=0.0,
    decoder_type='graph_conditional',  #'rnn_graph',# 'attention',
    plot_prefix='',
    dashboard=root_name,  # 'policy gradient',
    save_file_root_name=root_name,
    preload_file_root_name=
    None,  #root_name,  #'guacamol_ar_emb_node_rpev2_0lr2e-5',#'guacamol_ar_nodev2_0lr2e-5',#root_name,
    smiles_save_file=root_name.replace(' ', '_') + '_smiles.zip',
    on_policy_loss_type='advantage_record',
    rule_temperature_schedule=toothy_exp_schedule,
    eps=0.0,
    priors='conditional',
    entropy_wgt=1.0)
# preload_file='policy_gradient_run.h5')

while True: