def reward_aromatic_rings(smiles): ''' A simple reward to encourage larger molecule length :param smiles: list of strings :return: reward, list of float ''' return np.array([-1 if num is None else num+0.5 for num in num_aromatic_rings(smiles)])
def reward_aromatic_rings(smiles): ''' A simple reward to encourage larger molecule length :param smiles: list of strings :return: reward, list of float ''' if not len(smiles): return -1 # an empty string is invalid for our purposes atoms = num_aromatic_rings(smiles) return [-1 if num is None else num + 0.5 for num in atoms]
sa_thresh=0, normalize_scores=True) max_steps = 277 #settings['max_seq_length'] def second_score(smiles): pre_scores = 3 * 2.5 + scorer.get_scores(smiles)[0] score = np.power(pre_scores.prod(1), 0.333) for i in range(len(score)): if np.isnan(score[i]): score[i] = -1 return score reward_fun = lambda x: scorer(x) + np.array( [-5 * max(x - 5, 0) for x in num_aromatic_rings(x)] ) # - 0.2*np.array([0 if num is None else num for num in num_aromatic_rings(x)])# + reward_aliphatic_rings(x)# + 0.05*reward_aromatic_rings(x)#lambda x: reward_aromatic_rings(x)# #lambda x: reward_aromatic_rings(x)# model, fitter1, fitter2 = train_policy_gradient( molecules, grammar, EPOCHS=100, BATCH_SIZE=batch_size, reward_fun_on=reward_fun, max_steps=max_steps, lr_off=0.0, lr_on=1e-4, drop_rate=drop_rate, decoder_type='attention', #'random',# plot_prefix='anchor cycle', dashboard='anchor wweak sa', #None,#