Exemple #1
0
    def reward_property(self,
                        mol,
                        reward_type,
                        reward_ratio=None,
                        is_ratio=False):
        if is_ratio:
            reward = 0
            if reward_type is 'qed':
                reward = qed(mol) * reward_ratio['qed']
        else:
            reward = 0
            if reward_type is 'qed':
                reward = qed(mol)

        return reward
Exemple #2
0
 def evaluate_individual(self, individual):
     if individual is None:
         return None
     else:
         mol_graph = MolFromSmiles(individual.to_aromatic_smiles())
         score = qed(mol_graph)
         return score, [score]
Exemple #3
0
def get_qed(mol):
    """
    Get QED value (or nan if no molecule)
    :param mol: molecule
    :return: float QED
    """
    if mol is None:
        return np.nan
    return qed(mol)
Exemple #4
0
    def __init__(self, smiles, config):
        self.smiles = smiles

        self.possible_bonds = config.possible_bonds
        self.table_of_elements = config.table_of_elements
        self.vocab_nodes_encode = config.vocab_nodes_encode
        self.mol = Chem.MolFromSmiles(smiles)

        self.adj = self._get_adj_mat(smiles)
        self.node_list = self._get_node_list(smiles)
        self.num_atom = len(self.node_list)
        self.expand_mat = self._get_expand_mat(self.adj, self.node_list)

        self.property = {'qed': qed(self.mol), 'J_score': calc_score(self.mol)}
Exemple #5
0
def cal_prop(q, return_dict_prop):

    nbits = 1024
    while True:
        qqq = q.get()
        if qqq == 'DONE':
            #            print('proc =', os.getpid())
            break
        idx, smi = qqq

        #        if idx%10000==0:
        #            print(idx)
        mol = Chem.MolFromSmiles(smi)
        logP = MolLogP(mol)
        SAS = sascorer.calculateScore(mol)
        QED = qed(mol)
        MW = MolWt(mol)
        TPSA0 = TPSA(mol)

        return_dict_prop[idx] = [logP, SAS, QED, MW, TPSA0]
Exemple #6
0
    def __init__(self, smiles):
        self.smiles = smiles

        self.possible_bonds = possible_bonds
        self.table_of_elements = table_of_elements
        self.vocab_nodes_encode = vocab_nodes_encode
        self.mol = Chem.MolFromSmiles(smiles)

        self.adj = self._get_adj_mat(smiles)
        self.node_list = self._get_node_list(smiles)
        self.num_atom = len(self.node_list)
        self.expand_mat = self._get_expand_mat(self.adj, self.node_list)
        self.life_time = 0
        self.pool_life_time = 0
        self.similarity = -1

        self.property = {
            'qed': qed(self.mol),
            'J_score': calc_score(self.mol),
            'MW' : ExactMolWt(self.mol)
        }
        self.prior_flag = False
Exemple #7
0
def QED(mol):
    """
    Computes RDKit's QED score
    """
    return qed(mol)
Exemple #8
0
 def compute(self, mol):
     return qed(mol)
Exemple #9
0
    def gen_new_mols(self, num_mols=1000, qed_thresh=0.5, max_carbons=6):
        print("\n# Generating new molecules ...")

        np.random.seed(self.config.seed)

        # Get 2D latent representation
        pca = PCA(n_components=2)
        latent_2d = pca.fit_transform(self.latent)
        print(pca.explained_variance_ratio_, np.sum(pca.explained_variance_ratio_))

        # Sample latent 2D representation
        num_mols = min(num_mols, latent_2d.shape[0])
        idxs = np.random.choice([i for i in range(latent_2d.shape[0])], num_mols, replace=False)
        latent_2d_sampled = latent_2d[idxs, :]
        plot_2d_distribution(latent_2d_sampled, os.path.join(self.gen_folder, 'plots/latent_2d_dist.png'))

        # Approximate latent 2D representation by a gaussian
        mean = np.mean(latent_2d_sampled, axis=0)
        print(mean)

        covariance = np.cov(latent_2d_sampled.T)
        print(covariance)

        latent_2d_gen = np.random.multivariate_normal(mean, covariance, num_mols)
        plot_2d_distribution(latent_2d_gen, os.path.join(self.gen_folder, 'plots/latent_2d_gen_dist.png'))
        for i in range(10, 100, 10):
            per = np.percentile(latent_2d_gen, i, axis=0)
            print("percentile %d ... PC0: %.4f, PC1: %.4f" % (i, per[0], per[1]))

        # Convert back generated 2D latent data to its original dimension
        latent_gen = pca.inverse_transform(latent_2d_gen)

        # Generate valid smiles from latent
        smiles_gen = [latent2smiles(self.lat2states_model, self.sample_model, latent_gen[i:i + 1], self.config.max_len)
                      for i in tqdm(range(latent_gen.shape[0]))]
        check_quality_preds(smiles_gen, self.x_sample)
        smiles_gen = list(set(smiles_gen))
        print("%d molecules without dups ..." % len(smiles_gen))
        smiles_gen_valid = filter_valid_mols(smiles_gen, max_carbons=max_carbons)
        print("%d valid molecules remaining ..." % len(smiles_gen_valid))

        # Filter molecules with QED higher than a threshold
        fig, ax = plt.subplots()
        qed_list = []
        smiles_gen_valid_filt = []
        for smi in smiles_gen_valid:
            try:
                qed_val = qed(Chem.MolFromSmiles(smi))
                qed_list.append(qed_val)
                if qed_thresh is not None and (0 < qed_thresh < 1) and (qed_val > qed_thresh):
                    smiles_gen_valid_filt.append(smi)
            except:
                continue
        sns.kdeplot(qed_list, ax=ax)
        plt.savefig(os.path.join(self.gen_folder, 'plots/qed_distribution.png'), bbox_inches='tight')
        if qed_thresh is not None and (0 < qed_thresh < 1):
            smiles_gen_valid = smiles_gen_valid_filt.copy()
            print("%d valid molecules with QED > %.2f ..." % (len(smiles_gen_valid), qed_thresh))

        # Save smiles
        f = open(os.path.join(self.gen_folder, 'data/generated_%s.smi' % self.ft_index), 'w')
        for smi in smiles_gen_valid:
            f.write(smi + '\n')

        f.close()

        # Plot some images
        idxs = np.random.choice([i for i in range(len(smiles_gen_valid))], 9, replace=False)

        mols_sample = []
        smis_sample = []
        for i in idxs:
            smis_sample.append(smiles_gen_valid[i])
            mols_sample.append(Chem.MolFromSmiles(smiles_gen_valid[i]))

        img = Draw.MolsToGridImage(mols_sample, molsPerRow=3, subImgSize=(400, 200))
        img.save(os.path.join(self.gen_folder, 'images/generated_mols.png'))
Exemple #10
0
smiles_f = open('./ZINC/smiles.txt')
smiles_list = smiles_f.readlines()

logPList = []
molWtList = []
TPSAList = []
QEDList = []
SASList = []
for smi in smiles_list:
    smi = smi.strip()
    m = Chem.MolFromSmiles(smi)
    molWt = ExactMolWt(m)
    logP = MolLogP(m)
    TPSA = CalcTPSA(m)
    _qed = qed(m)
    sas = calculateScore(m)

    logPList.append(logP)
    molWtList.append(molWt)
    TPSAList.append(TPSA)
    QEDList.append(_qed)
    SASList.append(sas)

logPList = np.asarray(logPList)
TPSAList = np.asarray(TPSAList)
QEDList = np.asarray(QEDList)
SASList = np.asarray(SASList)

np.save('./ZINC/logP.npy', logPList)
np.save('./ZINC/TPSA.npy', TPSAList)
Exemple #11
0
from rdkit import Chem
from rdkit.Chem.QED import qed

with open('../id_smiles.txt') as f, open('data.txt', 'w') as w:
    for l in f:
        m_id, s1, s2 = l.split()
        m1, m2 = Chem.MolFromSmiles(s1), Chem.MolFromSmiles(s2)
        if m1 is None or m2 is None: continue
        c1, c2 = qed(m1), qed(m2)
        w.write(m_id + '\t' + str(c1) + '\t' + str(c2) + '\n')
Exemple #12
0
    if line[0] != '"':
        continue
    if line[1] != ",":
        smi = line[1:].strip()
        continue
    m = Chem.MolFromSmiles(smi)
    smi2 = Chem.MolToSmiles(m)

    property0 = line[2:].split(",")
    #    logP=float(property0[0])
    #    SAS=float(property0[2])
    #    QED=float(property0[1])

    logP = MolLogP(m)
    SAS = sascorer.calculateScore(m)
    QED = qed(m)

    MW = ExactMolWt(m)
    TPSA = CalcTPSA(m)
    line_out = "%s %6.3f %6.3f %6.3f %6.3f %6.3f\n" % (smi2, logP, SAS, QED,
                                                       MW, TPSA)
    fp_out.write(line_out)
    logP_list += [logP]
    SAS_list += [SAS]
    QED_list += [QED]
    MW_list += [MW]
    TPSA_list += [TPSA]

fp_out.close()

logP_array = np.array(logP_list)
Exemple #13
0
def QED(mol):
    '''
    Computes RDKit's QED score
    '''
    return qed(mol)
Exemple #14
0
    def step(self, action, force_final=False):
        a_first = action[0]
        a_second = action[1]
        a_edge = action[2]
        terminal = False
        total_num = self.mol.GetNumAtoms()

        self.steps = self.steps + 1
        self.old_mol = copy.deepcopy(self.mol)

        property_max_index = -1

        pass_test = True
        reward_step = 0

        #原子数目已经达到上限,还要继续加就会报错或者达到上限之后加错边也会报错
        if total_num == self.max_atom_num:
            if a_second >= self.max_atom_num:
                pass_test = False

            elif a_second < self.max_atom_num:
                if not self.add_bond(self.mol, a_first, a_second, a_edge):
                    pass_test = False

        else:
            if a_second >= self.max_atom_num:
                self.add_atom(self.mol, a_second - self.max_atom_num)
                a_second = total_num

                if not self.add_bond(self.mol, a_first, a_second, a_edge):
                    pass_test = False

            elif a_second < total_num:
                if not self.add_bond(self.mol, a_first, a_second, a_edge):
                    pass_test = False

            else:
                pass_test = False

        if pass_test and self.val_check(self.mol):
            self.max_atom_invalid_count = 0
            reward_step += self.reward_step_positive  #/ self.max_atom_num
            self.reward_pool.append(qed(self.mol))
            self.update()
        else:
            self.max_atom_invalid_count += 1
            reward_step += self.reward_step_negative  #/ self.max_atom_num
            # self.log('Step{0} valency test failed!'.format(self.steps))
            self.log('Current atom Smile:' + Chem.MolToSmiles(self.old_mol))
            self.mol = copy.deepcopy(self.old_mol)

        if self.max_atom_invalid_count >= self.max_error_count:
            terminal = True

        if terminal or force_final is True:
            property_max_index = np.argmax(self.reward_pool)
            #reward_step = self.reward_pool[int(property_max_index)] * self.reward_ratio['qed']
            property_best = self.reward_pool[int(property_max_index)]
            # reward_step += self.qed_metric(property_best)

        self.smiles.append(Chem.MolToSmiles(self.mol))

        info = {
            'reward_step': reward_step,
            'qed': self.reward_pool[-1],
            'smiles': self.smiles[-1],
            'best_index': property_max_index
        }

        return self.node_arr, self.adj, info, terminal
Exemple #15
0
def get_qed(smi):
    mol = Chem.MolFromSmiles(smi)
    return qed(mol)
Exemple #16
0
    def reward_property(self, mol, reward_type, reward_ratio):
        reward = 0
        if reward_type is 'qed':
            reward = qed(mol) * reward_ratio['qed']

        return reward
Exemple #17
0
    if not '.' in MolToSmiles(mol):
        mols.append(mol)
    if len(mols) == num_of_sample:
        break
''' validity check '''
num_valid = 0
svgs = []
qeds = np.zeros(num_of_sample)
for idx in range(num_of_sample):

    temp = MolFromSmiles(MolToSmiles(mols[idx]))
    if temp is not None:

        mols[idx] = temp
        num_valid += 1
        qeds[idx] = qed(mols[idx])
print("Validity is {:.2%}".format(num_valid / 10000))
''' uniqueness check '''
num_of_unique_gen = len(set([MolToSmiles(mol) for mol in mols]))
print("Uniqueness is {:.2%}".format(num_of_unique_gen / num_of_sample))
''' novelty check '''
data_tgt = [MolFromSmiles(i) for i in train_data]
data_tgt += mols
num_of_novel = len(set([
    MolToSmiles(mol) for mol in data_tgt
])) + num_of_sample - len(train_data) - num_of_unique_gen
print("Novelty is {:.2%}".format(num_of_novel / num_of_sample))

# =============================================================================
# draw, optional
# =============================================================================