Example #1
0
def get_score(src, trg, task='logp04'):
    if task == 'logp04':
        logp_improvement = penalized_logp(trg) - penalized_logp(src)
        # return logp_improvement * (0.6+min(sim, 0.4))
        return logp_improvement

    elif task == 'qed':
        return qed(trg)

    elif task == 'drd2':
        try:
            val = drd2(trg)
        except:
            print(
                "***************\n***************\n***************\n***************\n"
            )
            print(
                "***************\n***************\n***************\n***************\n"
            )
            print(trg)
            print(
                "***************\n***************\n***************\n***************\n"
            )
            print(
                "***************\n***************\n***************\n***************\n"
            )
            val = 0
        return val

    else:
        assert 'wrong task: %s' % task
Example #2
0
def get_scores(src, trg):
    sim = similarity(src, trg)
    # if sim < 0.4 or sim >= 1.0:
    if sim >= 1.0:
        sim = 0

    logp_improvement = penalized_logp(trg) - penalized_logp(src)
    qed_score = qed(trg)
    drd2_score = drd2(trg)

    return sim, logp_improvement, qed_score, drd2_score
Example #3
0
def get_score(src, trg, task='logp04'):
    sim = similarity(src, trg)
    if sim >= 1.0:
        return 0
    if task == 'logp04':
        logp_improvement = penalized_logp(trg) - penalized_logp(src)
        # return logp_improvement * (0.6+min(sim, 0.4))
        if sim < 0.4 and sim >= 0:
            return 0
        else:
            return logp_improvement

    elif task == 'qed':
        if sim < 0.4:
            return 0
        else:
            return qed(trg)

    elif task == 'drd2':
        if sim < 0.4:
            return 0
        else:
            try:
                val = drd2(trg)
            except:
                print(
                    "***************\n***************\n***************\n***************\n"
                )
                print(
                    "***************\n***************\n***************\n***************\n"
                )
                print(trg)
                print(
                    "***************\n***************\n***************\n***************\n"
                )
                print(
                    "***************\n***************\n***************\n***************\n"
                )
                val = 0
            return val

    else:
        assert 'wrong task: %s' % task
Example #4
0
def evaluate(test_smiles_list, model, moltokenizer, log_diff_list,
             qed_diff_list, drd2_diff_list):
    smiles_list = []
    ids_list = []
    property_x_list = []
    property_desired_list = []

    n_grid = 3 * 3 * 1

    for idx, smiles in enumerate(test_smiles_list):
        ids = moltokenizer.encode(smiles)
        logp_val = penalized_logp(smiles)
        qed_val = qed(smiles)
        drd2_val = drd2(smiles)

        property_x = np.expand_dims(np.array([logp_val, qed_val, drd2_val]),
                                    axis=0)

        for log_diff in log_diff_list:
            logp_improvement = logp_val + log_diff
            for qed_diff in qed_diff_list:
                qed_desired = qed_val + qed_diff
                for drd2_diff in drd2_diff_list:

                    drd2_desired = drd2_val + drd2_diff

                    property_desired = np.expand_dims(np.array(
                        [logp_improvement, qed_desired, drd2_desired]),
                                                      axis=0)

                    smiles_list.append(smiles)
                    ids_list.append(ids)
                    property_x_list.append(property_x)
                    property_desired_list.append(property_desired)

    x = np.array(
        tf.keras.preprocessing.sequence.pad_sequences(ids_list,
                                                      dtype="int64",
                                                      padding="post"))
    px = np.concatenate(property_x_list, axis=0)
    py = np.concatenate(property_desired_list, axis=0)

    print(px[2, :])
    print(py[2, :])

    print(px[10, :])
    print(py[10, :])

    outputs, _ = model.predict([x, px, py], batch_size=args.eb, verbose=1)

    if args.p == 1:
        valid_smiles_list = []
        score_plogp_all = []
        score_qed_all = []
        score_drd2_all = []
        for idx, output in tqdm.tqdm(enumerate(outputs)):
            smiles_x = smiles_list[idx]
            smiles_y = moltokenizer.decode(output)
            sim = similarity(smiles_x, smiles_y)
            if sim < 0.4:
                score_plogp_all.append(0)
                score_qed_all.append(0)
                score_drd2_all.append(0)
                valid_smiles_list.append('')
            else:
                score_plogp = get_score(smiles_x, smiles_y, task='logp04')
                score_qed = get_score(smiles_x, smiles_y, task='qed')
                score_drd2 = get_score(smiles_x, smiles_y, task='drd2')
                score_plogp_all.append(score_plogp)
                score_qed_all.append(score_qed)
                score_drd2_all.append(score_drd2)
                valid_smiles_list.append(smiles_y)

    else:
        with Timer("score multi calculation..."):
            n = len(outputs)
            n_proc = args.p
            batch = math.ceil(n / (n_proc))

            with Pool(processes=n_proc) as pool:
                r = pool.map_async(
                    worker_wrapper,
                    zip(repeat(outputs), repeat(smiles_list), repeat(n),
                        range(1, n_proc + 1), range(0, n, batch),
                        range(batch, batch * n_proc + 1, batch)))
                r.wait()

            score_all = []
            valid_smiles_list = []
            for partial_score, valid_smiles_p_list in r.get():
                score_all += partial_score
                valid_smiles_list += valid_smiles_p_list

            score_all = np.array(score_all)
            score_plogp_all = score_all[:, 0]
            score_qed_all = score_all[:, 1]
            score_drd2_all = score_all[:, 2]

    return score_plogp_all, score_qed_all, score_drd2_all, valid_smiles_list
Example #5
0
    )

    moltokenizer = Moltokenizer(params["vocab_file"])
    epoch_num = 270  # drd2 test 0.61, V0011
    # epoch_num = 210
    tst_model = get_model(params, epoch_num)

    selected_smiles = 'COC1=CC=C(C=C1)C(=O)N1CCCC1=O'

    log_diff_list = [0.0]
    qed_diff_list = [-0.1, -0.05, 0.0, 0.05, 0.1]
    drd2_diff_list = np.array(range(6, 10)) / 10.0

    org_plp = penalized_logp(selected_smiles)
    org_qed = qed(selected_smiles)
    org_drd2 = drd2(selected_smiles)
    score_plogp_all, score_qed_all, score_drd2_all, valid_smiles_list = \
        evaluate([selected_smiles], tst_model, moltokenizer, log_diff_list, qed_diff_list, drd2_diff_list)

    new_plp = score_plogp_all[np.argmax(score_drd2_all)]
    new_qed = score_qed_all[np.argmax(score_drd2_all)]
    new_drd2 = score_drd2_all[np.argmax(score_drd2_all)]
    new_smiles = valid_smiles_list[np.argmax(score_drd2_all)]

    print(org_plp, org_qed, org_drd2)
    print(new_plp, new_qed, new_drd2)

    print("improvemend:", new_drd2 - org_drd2)
    print("diff:", (new_plp - org_plp)**2 + (new_qed - org_qed)**2)

    print(new_smiles)