Python train_policy_gradient Exemples, generative_playground.molecules.train.pg.main_train_policy_gradient.train_policy_gradient Python Exemples

Exemple #1

0

Afficher le fichier

    score = np.power(pre_scores.prod(1), 0.333)
    for i in range(len(score)):
        if np.isnan(score[i]):
            score[i] = -1
    return score


reward_fun_on = lambda x: 2.5 + scorer(x)  #lambda x: reward_aromatic_rings(x)#

model, fitter1, fitter2 = train_policy_gradient(
    molecules,
    grammar,
    EPOCHS=100,
    BATCH_SIZE=batch_size,
    reward_fun_on=reward_fun_on,
    max_steps=max_steps,
    lr_off=1e-4,
    lr_on=1e-4,
    drop_rate=drop_rate,
    decoder_type='attention',
    plot_prefix='valid ',
    dashboard='valid',
    save_file='policy_gradient_valid.h5',
    smiles_save_file='pg_smiles_valid.h5',
    on_policy_loss_type='valid',
    off_policy_loss_type='mean')  #,
#preload_file='policy_gradient_main.h5')

while True:
    next(fitter1)
    next(fitter2)

Exemple #2

0

Afficher le fichier

Fichier : train_pg_anchor.py Projet : markharley/generative_playground

    return score


reward_fun = lambda x: 3 * 2.5 + scorer(
    x
)  # - 0.2*np.array([0 if num is None else num for num in num_aromatic_rings(x)])# + reward_aliphatic_rings(x)# + 0.05*reward_aromatic_rings(x)#lambda x: reward_aromatic_rings(x)# #lambda x: reward_aromatic_rings(x)#

model, fitter1, fitter2 = train_policy_gradient(
    molecules,
    grammar,
    EPOCHS=100,
    BATCH_SIZE=batch_size,
    reward_fun_on=reward_fun,
    max_steps=max_steps,
    lr_off=0.0,
    lr_on=1e-4,
    drop_rate=drop_rate,
    decoder_type='attention',  #'random',#
    plot_prefix='anchor ',
    dashboard='anchor',  #None,#
    save_file='paper/policy_gradient_anchor.h5',
    smiles_save_file='paper/pg_smiles_anchor.h5',
    on_policy_loss_type='best',
    off_policy_loss_type='mean',
    preload_file='paper/policy_gradient_baseline.h5',
    anchor_file='paper/policy_gradient_baseline.h5',
    anchor_weight=1e9)
#
while True:
    next(fitter1)
    #next(fitter2)

Exemple #3

0

Afficher le fichier

    for i in range(len(score)):
        if np.isnan(score[i]):
            score[i] = -1
    return score

reward_fun = lambda x: np.array([1 for _ in x])

model, fitter1, fitter2 = train_policy_gradient(molecules,
                                                grammar,
                                                EPOCHS=100,
                                                BATCH_SIZE=batch_size,
                                                reward_fun_on=reward_fun,
                                                max_steps=max_steps,
                                                lr_off=1e-4,
                                                lr_on=0.0,
                                                drop_rate=drop_rate,
                                                decoder_type='attention',#'action',#'random',#'action',#'step,
                                                plot_prefix='baseline ',
                                                dashboard='baseline',#None,#
                                                save_file='paper/policy_gradient_baseline_deleteme.h5',
                                                smiles_save_file='paper/pg_smiles_baseline_deleteme.h5',
                                                on_policy_loss_type='mean',
                                                off_policy_loss_type='mean',
                                                sanity_checks=False) # some of the database molecules don't conform to our constraints
                                                # preload_file='policy_gradient_tmp.h5',
                                                # anchor_file='policy_gradient_tmp.h5',
                                                # anchor_weight=1)
#
while True:
    next(fitter1)
    next(fitter2)