Ejemplo n.º 1
0
def log_best(j, all_best, n_valid, seen, lggr):
    if j % 50000 == 0:
        lggr.info("--iteration: %d--" % j)
        lggr.info("num valid (in this iteration): %d" % n_valid)
        logger.info("num unique (over all iterations): %s" % len(all_best))
        logger.info("num unique (in this iteration): %s" % len(seen))
        log_top_best(all_best, 5, lggr)
Ejemplo n.º 2
0
def log_progress():
    global t
    logger.info("--results--")
    logger.info("num valid: %d" % num_valid)
    logger.info("num unique: %s" % len(all_smiles))
    logger.info("num iterations: %s" % num_iterations)
    log_top_best(all_smiles, 5, logger)
    t = Timer(LOG_INTERVAL, log_progress)
    t.start()
Ejemplo n.º 3
0
 def log_progress():
     global t
     logger.info("--results--")
     logger.info("num simulations: %s" % simulations)
     logger.info("num valid (in this iteration): %d" % num_valid)
     logger.info("num unique (over all iterations): %s" % len(all_unique))
     logger.info("num unique (in this iteration): %s" % len(seen))
     log_top_best(all_unique, 5, logger)
     t = Timer(LOG_INTERVAL, log_progress)
     t.start()
Ejemplo n.º 4
0
def log_best(j, all_best, n_valid, lggr):
    if j % 1000 == 0:
        lggr.info("--iteration: %d--" % j)
        lggr.info("num valid: %d" % n_valid)
        log_top_best(all_best, 5, lggr)
Ejemplo n.º 5
0
    end = time.time()

    logger.info("--done--")
    logger.info("num valid: %d" % num_valid)

    best = mcts.get_best_sequence()
    generated_text = ''.join(best[0])
    logger.info("best generated text: %s" % generated_text)
    decoded = DeepSMILESLanguageModelUtils.decode(generated_text,
                                                  start='<s>',
                                                  end='</s>')
    smiles = DeepSMILESLanguageModelUtils.sanitize(decoded)
    logger.info("best SMILES: %s, J: %s (%s seconds)" %
                (smiles, scorer.score(smiles), str((end - start))))

    log_top_best(all_smiles, 5, logger)

    logger.info("writing dataset...")
    name = 'molexit-%d' % n
    dataset = '../models/molexit/%s.txt' % name
    with open(dataset, 'w') as f:
        for smi in list(
                reversed(sorted(all_smiles.items(),
                                key=lambda kv: kv[1][0])))[:keep_top_n]:
            dsmi = smiles_to_deepsmiles(smi[0].strip())
            tok = DeepSMILESTokenizer(dsmi)
            tokens = tok.get_tokens()
            f.write(' '.join([t.value for t in tokens]))
            f.write("\n")

    logger.info('training new LM...')
Ejemplo n.º 6
0
    except StopTreeSearch:
        pass

    t.cancel()
    end = time.time()

    logger.info("--done--")
    logger.info("num simulations: %s" % simulations)
    logger.info("num valid (in this iteration): %d" % num_valid)
    logger.info("num valid (over all iterations): %d" % len(all_valid))
    logger.info("num unique (over all iterations): %s" % len(all_unique))
    logger.info("num unique (in this iteration): %s" % len(seen))
    logger.info("best SMILES: %s, J: %s (%s seconds)" %
                (current_best_smiles, current_best_score, str((end - start))))

    log_top_best(all_unique, 5, logger)

    logger.info("writing dataset...")
    name = 'molexit-%d' % n
    dataset = '../models/molexit/%s.txt' % name
    dataset_scores = []
    with open(dataset, 'w') as f:
        for smi in list(
                reversed(sorted(all_unique.items(),
                                key=lambda kv: kv[1][0])))[:keep_top_n]:
            try:
                dsmi = smiles_to_deepsmiles(smi[0].strip())
                tok = DeepSMILESTokenizer(dsmi)
                tokens = tok.get_tokens()
                f.write(' '.join([t.value for t in tokens]))
                f.write("\n")