Example #1
0
    def eval_function(text):
        global i, num_valid, all_smiles
        i += 1

        generated = ''.join(text)
        try:
            decoded = DeepSMILESLanguageModelUtils.decode(generated,
                                                          start='<s>',
                                                          end='</s>')
            smiles = DeepSMILESLanguageModelUtils.sanitize(decoded)
        except Exception:
            log_best(i, all_smiles, num_valid, logger)
            return -1.0

        num_valid += 1

        # synthetic accessibility score is a number between 1 (easy to make) and 10 (very difficult to make)
        sascore = sascorer.calculateScore(Chem.MolFromSmiles(smiles)) / 10.

        # cycle score, squashed between 0 and 1
        cyclescore = cycle_scorer.score(smiles)
        cyclescore = cyclescore / (1 + cyclescore)

        distance_score = distance_scorer.score(smiles)

        score = (0.75 * distance_score) + (0.15 *
                                           (1 - sascore)) + (0.10 *
                                                             (1 - cyclescore))

        all_smiles[smiles] = (score, generated)

        logger.debug("%s, %s" % (smiles, str(score)))
        log_best(i, all_smiles, num_valid, logger)
        return score
Example #2
0
    def eval_function(text):
        global simulations, all_unique, elapsed, current_best_score, current_best_smiles, beats_current

        if elapsed >= time_limit or len(seen) == max_gen:
            # if elapsed >= time_limit or simulations == max_sims:
            raise StopTreeSearch()

        simulations += 1

        generated = ''.join(text)
        try:
            decoded = DeepSMILESLanguageModelUtils.decode(generated,
                                                          start='<s>',
                                                          end='</s>')
            smiles = DeepSMILESLanguageModelUtils.sanitize(decoded)
            mol = Chem.MolFromSmiles(smiles)
            if mol is None: raise Exception
        except Exception:
            elapsed = time.time() - start
            return -1.0

        # synthetic accessibility score is a number between 1 (easy to make) and 10 (very difficult to make)
        sascore = sascorer.calculateScore(mol) / 10.

        # cycle score, squashed between 0 and 1
        cyclescore = cycle_scorer.score_mol(mol)
        cyclescore = cyclescore / (1 + cyclescore)

        distance_score = distance_scorer.score_mol(mol)

        weighted_score = (0.75 * distance_score) + (0.15 * (1 - sascore)) + (
            0.10 * (1 - cyclescore))

        if current_best_score is None or beats_current(distance_score):
            current_best_score = distance_score
            current_best_smiles = smiles

        if distance_score == 1.0:
            logger.info("FOUND!")

        # ret_score = -1.0 if smiles in seen else weighted_score
        ret_score = -1.0 if smiles in all_unique else weighted_score

        # rescale score from [0,1] to [-1,1]
        ret_score = (ret_score * 2) + (-1) if ret_score >= 0. else ret_score

        all_unique[smiles] = (distance_score, generated)
        all_valid.append((smiles, distance_score))
        seen[smiles] = distance_score

        elapsed = time.time() - start
        return ret_score
    def eval_function(text):
        global simulations, num_valid, all_smiles, elapsed

        if elapsed >= TIME_PER_ITERATION:
            raise StopTreeSearch()

        simulations += 1

        generated = ''.join(text)
        try:
            decoded = DeepSMILESLanguageModelUtils.decode(generated,
                                                          start='<s>',
                                                          end='</s>')
            smiles = DeepSMILESLanguageModelUtils.sanitize(decoded)
            mol = Chem.MolFromSmiles(smiles)
            if mol is None: raise Exception
        except Exception:
            elapsed = time.time() - start
            return -1.0

        num_valid += 1

        if smiles in seen:
            score = -1.0
        else:
            # synthetic accessibility score is a number between 1 (easy to make) and 10 (very difficult to make)
            sascore = sascorer.calculateScore(mol) / 10.

            # cycle score, squashed between 0 and 1
            cyclescore = cycle_scorer.score_mol(mol)
            cyclescore = cyclescore / (1 + cyclescore)

            distance_score = distance_scorer.score_mol(mol)

            score = (0.75 *
                     distance_score) + (0.15 *
                                        (1 - sascore)) + (0.10 *
                                                          (1 - cyclescore))

            seen.add(smiles)
            all_smiles[smiles] = (score, generated)
            if distance_score == 1.0:
                logger.info("FOUND!")

        # rescale score from [0,1] to [-1,1]
        ret_score = (score * 2) + (-1) if score >= 0. else score

        elapsed = time.time() - start
        return ret_score
Example #4
0
    start = time.time()

    logger.info("beginning search...")
    for i in range(num_attempts):
        try:
            generated = lm.generate(num_chars=100, text_seed='<s>')

            decoded = DeepSMILESLanguageModelUtils.decode(generated,
                                                          start='<s>',
                                                          end='</s>')
            sanitized = DeepSMILESLanguageModelUtils.sanitize(decoded)

            num_valid += 1

            # synthetic accessibility score is a number between 1 (easy to make) and 10 (very difficult to make)
            sascore = sascorer.calculateScore(
                Chem.MolFromSmiles(sanitized)) / 10.
            # cycle score, squashed between 0 and 1
            cyclescore = cycle_scorer.score(sanitized)
            cyclescore = cyclescore / (1 + cyclescore)
            distance_score = distance_scorer.score(sanitized)
            score = (0.75 *
                     distance_score) + (0.15 *
                                        (1 - sascore)) + (0.10 *
                                                          (1 - cyclescore))

            all_smiles[sanitized] = (score, generated)

            if current_best_score is None or beats_current(score):
                current_best_score = score
                current_best_smiles = sanitized
    start = time.time()
    elapsed = time.time() - start
    while elapsed < TIME_PER_ITERATION:
        simulations += 1
        try:
            generated = lm.generate(num_chars=100, text_seed='<s>')

            decoded = DeepSMILESLanguageModelUtils.decode(generated, start='<s>', end='</s>')
            sanitized = DeepSMILESLanguageModelUtils.sanitize(decoded)
            mol = Chem.MolFromSmiles(sanitized)

            num_valid += 1

            # synthetic accessibility score is a number between 1 (easy to make) and 10 (very difficult to make)
            sascore = sascorer.calculateScore(mol) / 10.
            # cycle score, squashed between 0 and 1
            cyclescore = cycle_scorer.score_mol(mol)
            cyclescore = cyclescore / (1 + cyclescore)
            distance_score = distance_scorer.score_mol(mol)
            score = (0.75 * distance_score) + (0.15 * (1 - sascore)) + (0.10 * (1 - cyclescore))

            all_smiles[sanitized] = (score, generated)
            seen.add(sanitized)
            if distance_score == 1.0:
                logger.info("FOUND!")

            if current_best_score is None or beats_current(score):
                current_best_score = score
                current_best_smiles = sanitized