コード例 #1
0
    def generate_optimized_molecules(self,
                                     scoring_function,
                                     number_molecules,
                                     starting_population=None):
        print("generating %s samples..." % number_molecules)
        all_smiles = set()
        width = 24
        max_depth = 100
        c = 5
        num_simulations = 10000

        self.best_smiles = None
        self.best_score = -1.0

        def eval_function(text):

            generated = ''.join(text)
            try:
                decoded = DeepSMILESLanguageModelUtils.decode(generated,
                                                              start='<s>',
                                                              end='</s>')
                smiles = DeepSMILESLanguageModelUtils.sanitize(decoded)
            except Exception:
                return -1.0

            if smiles in all_smiles:
                score = -1.0
            else:
                score = scoring_function.score(smiles)
                all_smiles.add(smiles)

            if self.best_score < score:
                self.best_score = score
                self.best_smiles = smiles

            return score

        mcts = LanguageModelMCTSWithPUCTTerminating(self.lm,
                                                    width,
                                                    max_depth,
                                                    eval_function,
                                                    cpuct=c,
                                                    terminating_symbol='</s>')
        mcts.search(["<s>"], num_simulations)

        return [self.best_smiles]
コード例 #2
0
    def generate(self, number_samples):
        print("generating %s samples..." % number_samples)
        all_smiles = set()
        samples = []
        width = 24
        max_depth = 100
        c = 5

        def eval_function(text):

            generated = ''.join(text)
            try:
                decoded = DeepSMILESLanguageModelUtils.decode(generated,
                                                              start='<s>',
                                                              end='</s>')
                smiles = DeepSMILESLanguageModelUtils.sanitize(decoded)
            except Exception:
                samples.append("invalid")
                return -1.0

            samples.append(smiles)

            if smiles in all_smiles:
                score = -1.0
            else:
                score = 1.0
                all_smiles.add(smiles)

            return score

        mcts = LanguageModelMCTSWithPUCTTerminating(self.lm,
                                                    width,
                                                    max_depth,
                                                    eval_function,
                                                    cpuct=c,
                                                    terminating_symbol='</s>')
        mcts.search(["<s>"], number_samples)

        return samples
コード例 #3
0
        logger.debug("%s, %s" % (smiles, str(score)))
        log_best(i, all_smiles, num_valid, logger)
        return score

    mcts = LanguageModelMCTSWithPUCTTerminating(lm,
                                                width,
                                                max_depth,
                                                eval_function,
                                                cpuct=c,
                                                terminating_symbol='</s>')
    state = start_state

    logger.info("beginning search...")
    start = time.time()
    mcts.search(state, num_simulations)
    end = time.time()

    logger.info("--done--")
    logger.info("num valid: %d" % num_valid)

    best = mcts.get_best_sequence()
    generated_text = ''.join(best[0])
    logger.info("best generated text: %s" % generated_text)
    decoded = DeepSMILESLanguageModelUtils.decode(generated_text,
                                                  start='<s>',
                                                  end='</s>')
    smiles = DeepSMILESLanguageModelUtils.sanitize(decoded)
    logger.info("best SMILES: %s, J: %s (%s seconds)" %
                (smiles, scorer.score(smiles), str((end - start))))