def eval_function(text): global i, num_valid, all_smiles i += 1 generated = ''.join(text) try: decoded = DeepSMILESLanguageModelUtils.decode(generated, start='<s>', end='</s>') smiles = DeepSMILESLanguageModelUtils.sanitize(decoded) except Exception: log_best(i, all_smiles, num_valid, logger) return -1.0 num_valid += 1 # synthetic accessibility score is a number between 1 (easy to make) and 10 (very difficult to make) sascore = sascorer.calculateScore(Chem.MolFromSmiles(smiles)) / 10. # cycle score, squashed between 0 and 1 cyclescore = cycle_scorer.score(smiles) cyclescore = cyclescore / (1 + cyclescore) distance_score = distance_scorer.score(smiles) score = (0.75 * distance_score) + (0.15 * (1 - sascore)) + (0.10 * (1 - cyclescore)) all_smiles[smiles] = (score, generated) logger.debug("%s, %s" % (smiles, str(score))) log_best(i, all_smiles, num_valid, logger) return score
def eval_function(text): global simulations, all_unique, elapsed, current_best_score, current_best_smiles, beats_current if elapsed >= time_limit or len(seen) == max_gen: # if elapsed >= time_limit or simulations == max_sims: raise StopTreeSearch() simulations += 1 generated = ''.join(text) try: decoded = DeepSMILESLanguageModelUtils.decode(generated, start='<s>', end='</s>') smiles = DeepSMILESLanguageModelUtils.sanitize(decoded) mol = Chem.MolFromSmiles(smiles) if mol is None: raise Exception except Exception: elapsed = time.time() - start return -1.0 # synthetic accessibility score is a number between 1 (easy to make) and 10 (very difficult to make) sascore = sascorer.calculateScore(mol) / 10. # cycle score, squashed between 0 and 1 cyclescore = cycle_scorer.score_mol(mol) cyclescore = cyclescore / (1 + cyclescore) distance_score = distance_scorer.score_mol(mol) weighted_score = (0.75 * distance_score) + (0.15 * (1 - sascore)) + ( 0.10 * (1 - cyclescore)) if current_best_score is None or beats_current(distance_score): current_best_score = distance_score current_best_smiles = smiles if distance_score == 1.0: logger.info("FOUND!") # ret_score = -1.0 if smiles in seen else weighted_score ret_score = -1.0 if smiles in all_unique else weighted_score # rescale score from [0,1] to [-1,1] ret_score = (ret_score * 2) + (-1) if ret_score >= 0. else ret_score all_unique[smiles] = (distance_score, generated) all_valid.append((smiles, distance_score)) seen[smiles] = distance_score elapsed = time.time() - start return ret_score
def eval_function(text): global simulations, num_valid, all_smiles, elapsed if elapsed >= TIME_PER_ITERATION: raise StopTreeSearch() simulations += 1 generated = ''.join(text) try: decoded = DeepSMILESLanguageModelUtils.decode(generated, start='<s>', end='</s>') smiles = DeepSMILESLanguageModelUtils.sanitize(decoded) mol = Chem.MolFromSmiles(smiles) if mol is None: raise Exception except Exception: elapsed = time.time() - start return -1.0 num_valid += 1 if smiles in seen: score = -1.0 else: # synthetic accessibility score is a number between 1 (easy to make) and 10 (very difficult to make) sascore = sascorer.calculateScore(mol) / 10. # cycle score, squashed between 0 and 1 cyclescore = cycle_scorer.score_mol(mol) cyclescore = cyclescore / (1 + cyclescore) distance_score = distance_scorer.score_mol(mol) score = (0.75 * distance_score) + (0.15 * (1 - sascore)) + (0.10 * (1 - cyclescore)) seen.add(smiles) all_smiles[smiles] = (score, generated) if distance_score == 1.0: logger.info("FOUND!") # rescale score from [0,1] to [-1,1] ret_score = (score * 2) + (-1) if score >= 0. else score elapsed = time.time() - start return ret_score
start = time.time() logger.info("beginning search...") for i in range(num_attempts): try: generated = lm.generate(num_chars=100, text_seed='<s>') decoded = DeepSMILESLanguageModelUtils.decode(generated, start='<s>', end='</s>') sanitized = DeepSMILESLanguageModelUtils.sanitize(decoded) num_valid += 1 # synthetic accessibility score is a number between 1 (easy to make) and 10 (very difficult to make) sascore = sascorer.calculateScore( Chem.MolFromSmiles(sanitized)) / 10. # cycle score, squashed between 0 and 1 cyclescore = cycle_scorer.score(sanitized) cyclescore = cyclescore / (1 + cyclescore) distance_score = distance_scorer.score(sanitized) score = (0.75 * distance_score) + (0.15 * (1 - sascore)) + (0.10 * (1 - cyclescore)) all_smiles[sanitized] = (score, generated) if current_best_score is None or beats_current(score): current_best_score = score current_best_smiles = sanitized
start = time.time() elapsed = time.time() - start while elapsed < TIME_PER_ITERATION: simulations += 1 try: generated = lm.generate(num_chars=100, text_seed='<s>') decoded = DeepSMILESLanguageModelUtils.decode(generated, start='<s>', end='</s>') sanitized = DeepSMILESLanguageModelUtils.sanitize(decoded) mol = Chem.MolFromSmiles(sanitized) num_valid += 1 # synthetic accessibility score is a number between 1 (easy to make) and 10 (very difficult to make) sascore = sascorer.calculateScore(mol) / 10. # cycle score, squashed between 0 and 1 cyclescore = cycle_scorer.score_mol(mol) cyclescore = cyclescore / (1 + cyclescore) distance_score = distance_scorer.score_mol(mol) score = (0.75 * distance_score) + (0.15 * (1 - sascore)) + (0.10 * (1 - cyclescore)) all_smiles[sanitized] = (score, generated) seen.add(sanitized) if distance_score == 1.0: logger.info("FOUND!") if current_best_score is None or beats_current(score): current_best_score = score current_best_smiles = sanitized