def log_best(j, all_best, n_valid, seen, lggr): if j % 50000 == 0: lggr.info("--iteration: %d--" % j) lggr.info("num valid (in this iteration): %d" % n_valid) logger.info("num unique (over all iterations): %s" % len(all_best)) logger.info("num unique (in this iteration): %s" % len(seen)) log_top_best(all_best, 5, lggr)
def log_progress(): global t logger.info("--results--") logger.info("num valid: %d" % num_valid) logger.info("num unique: %s" % len(all_smiles)) logger.info("num iterations: %s" % num_iterations) log_top_best(all_smiles, 5, logger) t = Timer(LOG_INTERVAL, log_progress) t.start()
def log_progress(): global t logger.info("--results--") logger.info("num simulations: %s" % simulations) logger.info("num valid (in this iteration): %d" % num_valid) logger.info("num unique (over all iterations): %s" % len(all_unique)) logger.info("num unique (in this iteration): %s" % len(seen)) log_top_best(all_unique, 5, logger) t = Timer(LOG_INTERVAL, log_progress) t.start()
def log_best(j, all_best, n_valid, lggr): if j % 1000 == 0: lggr.info("--iteration: %d--" % j) lggr.info("num valid: %d" % n_valid) log_top_best(all_best, 5, lggr)
end = time.time() logger.info("--done--") logger.info("num valid: %d" % num_valid) best = mcts.get_best_sequence() generated_text = ''.join(best[0]) logger.info("best generated text: %s" % generated_text) decoded = DeepSMILESLanguageModelUtils.decode(generated_text, start='<s>', end='</s>') smiles = DeepSMILESLanguageModelUtils.sanitize(decoded) logger.info("best SMILES: %s, J: %s (%s seconds)" % (smiles, scorer.score(smiles), str((end - start)))) log_top_best(all_smiles, 5, logger) logger.info("writing dataset...") name = 'molexit-%d' % n dataset = '../models/molexit/%s.txt' % name with open(dataset, 'w') as f: for smi in list( reversed(sorted(all_smiles.items(), key=lambda kv: kv[1][0])))[:keep_top_n]: dsmi = smiles_to_deepsmiles(smi[0].strip()) tok = DeepSMILESTokenizer(dsmi) tokens = tok.get_tokens() f.write(' '.join([t.value for t in tokens])) f.write("\n") logger.info('training new LM...')
except StopTreeSearch: pass t.cancel() end = time.time() logger.info("--done--") logger.info("num simulations: %s" % simulations) logger.info("num valid (in this iteration): %d" % num_valid) logger.info("num valid (over all iterations): %d" % len(all_valid)) logger.info("num unique (over all iterations): %s" % len(all_unique)) logger.info("num unique (in this iteration): %s" % len(seen)) logger.info("best SMILES: %s, J: %s (%s seconds)" % (current_best_smiles, current_best_score, str((end - start)))) log_top_best(all_unique, 5, logger) logger.info("writing dataset...") name = 'molexit-%d' % n dataset = '../models/molexit/%s.txt' % name dataset_scores = [] with open(dataset, 'w') as f: for smi in list( reversed(sorted(all_unique.items(), key=lambda kv: kv[1][0])))[:keep_top_n]: try: dsmi = smiles_to_deepsmiles(smi[0].strip()) tok = DeepSMILESTokenizer(dsmi) tokens = tok.get_tokens() f.write(' '.join([t.value for t in tokens])) f.write("\n")