def eval_function(text): global i, num_valid, all_smiles, elapsed if elapsed >= TIME_LIMIT: raise StopTreeSearch() i += 1 generated = ''.join(text) try: decoded = DeepSMILESLanguageModelUtils.decode(generated, start='<s>', end='</s>') smiles = DeepSMILESLanguageModelUtils.sanitize(decoded) except Exception: return -1.0 num_valid += 1 if smiles in all_smiles: score = -1.0 else: score = 1.0 all_smiles.add(smiles) elapsed = time.time() - start return score
def eval_function(text): global simulations, num_valid, all_unique, all_valid, elapsed, current_best_score, current_best_smiles, beats_current if elapsed >= TIME_LIMIT: raise StopTreeSearch() simulations += 1 generated = ''.join(text) try: decoded = DeepSMILESLanguageModelUtils.decode(generated, start='<s>', end='</s>') smiles = DeepSMILESLanguageModelUtils.sanitize(decoded) mol = Chem.MolFromSmiles(smiles) if mol is None: raise Exception except Exception: elapsed = time.time() - start return -1.0 num_valid += 1 score = distance_scorer.score_mol(mol) all_unique[smiles] = (score, generated) if current_best_score is None or beats_current(score): current_best_score = score current_best_smiles = smiles all_valid.append((smiles, score)) ret_score = -1.0 if smiles in all_unique else score # rescale score from [0,1] to [-1,1] ret_score = (ret_score * 2) + (-1) if ret_score >= 0. else ret_score elapsed = time.time() - start return ret_score
def eval_function(text): global i, num_valid, all_smiles, elapsed if elapsed >= TIME_LIMIT: raise StopTreeSearch() i += 1 generated = ''.join(text) try: decoded = DeepSMILESLanguageModelUtils.decode(generated, start='<s>', end='</s>') smiles = DeepSMILESLanguageModelUtils.sanitize(decoded) except Exception: elapsed = time.time() - start return -1.0 num_valid += 1 if smiles in all_smiles: score = -1.0 else: jscore = jscorer.score(smiles) score = jscore / (1 + np.abs(jscore)) all_smiles[smiles] = (jscore, generated) logger.debug("%s, %s" % (smiles, str(score))) elapsed = time.time() - start return score
def eval_function(text): global simulations, all_unique, elapsed, current_best_score, current_best_smiles, beats_current if elapsed >= time_limit or len(seen) == max_gen: # if elapsed >= time_limit or simulations == max_sims: raise StopTreeSearch() simulations += 1 generated = ''.join(text) try: decoded = DeepSMILESLanguageModelUtils.decode(generated, start='<s>', end='</s>') smiles = DeepSMILESLanguageModelUtils.sanitize(decoded) mol = Chem.MolFromSmiles(smiles) if mol is None: raise Exception except Exception: elapsed = time.time() - start return -1.0 # synthetic accessibility score is a number between 1 (easy to make) and 10 (very difficult to make) sascore = sascorer.calculateScore(mol) / 10. # cycle score, squashed between 0 and 1 cyclescore = cycle_scorer.score_mol(mol) cyclescore = cyclescore / (1 + cyclescore) distance_score = distance_scorer.score_mol(mol) weighted_score = (0.75 * distance_score) + (0.15 * (1 - sascore)) + ( 0.10 * (1 - cyclescore)) if current_best_score is None or beats_current(distance_score): current_best_score = distance_score current_best_smiles = smiles if distance_score == 1.0: logger.info("FOUND!") # ret_score = -1.0 if smiles in seen else weighted_score ret_score = -1.0 if smiles in all_unique else weighted_score # rescale score from [0,1] to [-1,1] ret_score = (ret_score * 2) + (-1) if ret_score >= 0. else ret_score all_unique[smiles] = (distance_score, generated) all_valid.append((smiles, distance_score)) seen[smiles] = distance_score elapsed = time.time() - start return ret_score
def eval_function(text): global simulations, num_valid, all_smiles, elapsed if elapsed >= TIME_PER_ITERATION: raise StopTreeSearch() simulations += 1 generated = ''.join(text) try: decoded = DeepSMILESLanguageModelUtils.decode(generated, start='<s>', end='</s>') smiles = DeepSMILESLanguageModelUtils.sanitize(decoded) mol = Chem.MolFromSmiles(smiles) if mol is None: raise Exception except Exception: elapsed = time.time() - start return -1.0 num_valid += 1 if smiles in seen: score = -1.0 else: # synthetic accessibility score is a number between 1 (easy to make) and 10 (very difficult to make) sascore = sascorer.calculateScore(mol) / 10. # cycle score, squashed between 0 and 1 cyclescore = cycle_scorer.score_mol(mol) cyclescore = cyclescore / (1 + cyclescore) distance_score = distance_scorer.score_mol(mol) score = (0.75 * distance_score) + (0.15 * (1 - sascore)) + (0.10 * (1 - cyclescore)) seen.add(smiles) all_smiles[smiles] = (score, generated) if distance_score == 1.0: logger.info("FOUND!") # rescale score from [0,1] to [-1,1] ret_score = (score * 2) + (-1) if score >= 0. else score elapsed = time.time() - start return ret_score
def eval_function(text): global simulations, all_unique, elapsed, current_best_score, current_best_smiles, beats_current if elapsed >= time_limit or len(seen) == max_gen: # if elapsed >= time_limit or simulations == max_sims: raise StopTreeSearch() simulations += 1 generated = ''.join(text) try: decoded = DeepSMILESLanguageModelUtils.decode(generated, start='<s>', end='</s>') smiles = DeepSMILESLanguageModelUtils.sanitize(decoded) mol = Chem.MolFromSmiles(smiles) if mol is None: raise Exception except Exception: elapsed = time.time() - start return -1.0 score = distance_scorer.score_mol(mol) if current_best_score is None or beats_current(score): current_best_score = score current_best_smiles = smiles if score == 1.0: logger.info("FOUND!") # ret_score = -1.0 if smiles in seen else score ret_score = -1.0 if smiles in all_unique else score # rescale score from [0,1] to [-1,1] ret_score = (ret_score * 2) + (-1) if ret_score >= 0. else ret_score all_unique[smiles] = (score, generated) all_valid.append((smiles, score)) seen[smiles] = score elapsed = time.time() - start return ret_score
def eval_function(text): global simulations, num_valid, all_unique, elapsed, current_best_score, current_best_smiles, beats_current if elapsed >= TIME_PER_ITERATION: raise StopTreeSearch() simulations += 1 generated = ''.join(text) try: decoded = DeepSMILESLanguageModelUtils.decode(generated, start='<s>', end='</s>') smiles = DeepSMILESLanguageModelUtils.sanitize(decoded) mol = Chem.MolFromSmiles(smiles) if mol is None: raise Exception except Exception: elapsed = time.time() - start return -1.0 num_valid += 1 score = distance_scorer.score_mol(mol) seen.add(smiles) all_unique[smiles] = (score, generated) if current_best_score is None or beats_current(score): current_best_score = score current_best_smiles = smiles all_valid.append((smiles, score)) if score == 1.0: logger.info("FOUND!") ret_score = -1.0 if smiles in seen else score elapsed = time.time() - start return ret_score
def eval_function(text): global simulations, all_unique, elapsed, current_best_score, current_best_smiles, beats_current if elapsed >= time_limit or len(all_valid) == max_gen: raise StopTreeSearch() simulations += 1 generated = ''.join(text) try: decoded = DeepSMILESLanguageModelUtils.decode(generated, start='<s>', end='</s>') smiles = DeepSMILESLanguageModelUtils.sanitize(decoded) mol = Chem.MolFromSmiles(smiles) if mol is None: raise Exception except Exception: elapsed = time.time() - start return -1.0 score = distance_scorer.score_mol(mol) if current_best_score is None or beats_current(score): current_best_score = score current_best_smiles = smiles if score == 1.0: logger.info("FOUND!") ### # As in "Molecular de-novo design through deep reinforcement learning", by Olivecrona et al., we are adding # the prior's log probability of the generated sequence to the score. prior_log_prob = prior.log_prob( DeepSMILESLanguageModelUtils.extract_sentence(text, join_on=' ', start='<s>', end='</s>')) rescaled_distance_score = (score * 2) + (-1) # rescale the prior log prob # in practice, the log probs are rarely less than -45 rescale_min = -45 if prior_log_prob < rescale_min: logger.info("WARNING: prior log prob lower than %s" % rescale_min) # because probabilities are in the range [0,1], the max log prob is log(1) i.e. 0 rescale_max = 0.0 # scaling x into [a,b]: (b-a)*((x - min(x))/(max(x) - min(x))+a rescaled_log_prob = (1 - (-1)) * ((prior_log_prob - rescale_min) / (rescale_max - rescale_min)) + (-1) ret_score = ( 1 - sigma) * rescaled_log_prob + sigma * rescaled_distance_score ret_score = -1.0 if smiles in seen else ret_score ### all_unique[smiles] = (score, generated) all_valid.append((smiles, score)) seen.add(smiles) elapsed = time.time() - start return ret_score
def eval_function(text): global simulations, num_valid, all_unique, elapsed, current_best_score, current_best_smiles, beats_current if elapsed >= TIME_PER_ITERATION: raise StopTreeSearch() simulations += 1 generated = ''.join(text) try: decoded = DeepSMILESLanguageModelUtils.decode(generated, start='<s>', end='</s>') smiles = DeepSMILESLanguageModelUtils.sanitize(decoded) mol = Chem.MolFromSmiles(smiles) if mol is None: raise Exception except Exception: elapsed = time.time() - start return -1.0 num_valid += 1 distance_score = distance_scorer.score_mol(mol) if distance_score == 1.0: logger.info("FOUND!") # As in "Molecular de-novo design through deep reinforcement learning", by Olivecrona et al., we are adding # the prior's log probability of the generated sequence to the score. prior_log_prob = prior.log_prob( DeepSMILESLanguageModelUtils.extract_sentence(text, join_on=' ', start='<s>', end='</s>')) # tot_score = prior_log_prob + sigma * ((distance_score * 2) + (-1)) # rescale the distance score from [0,1] to [-1,1] tot_score = prior_log_prob + sigma * distance_score # rescale the score # in practice, the log probs are rarely less than -45; so the min tot_score can be: -45 + (sigma*-1.0) rescale_min = -45 - sigma if tot_score < rescale_min: logger.info("WARNING: total score lower than %s" % rescale_min) # because probabilities are in the range [0,1], the max log prob is log(1) i.e. 0 # so the max tot_score can be: 0 + sigma*1.0 rescale_max = sigma # scaling x into [a,b]: (b-a)*((x - min(x))/(max(x) - min(x))+a ret_score = (1 - (-1)) * ((tot_score - rescale_min) / (rescale_max - rescale_min)) + (-1) ret_score = -1.0 if smiles in seen else ret_score if current_best_score is None or beats_current(distance_score): current_best_score = distance_score current_best_smiles = smiles all_unique[smiles] = (distance_score, generated) all_valid.append((smiles, distance_score)) seen.add(smiles) elapsed = time.time() - start return ret_score