def reward_property(self, mol, reward_type, reward_ratio=None, is_ratio=False): if is_ratio: reward = 0 if reward_type is 'qed': reward = qed(mol) * reward_ratio['qed'] else: reward = 0 if reward_type is 'qed': reward = qed(mol) return reward
def evaluate_individual(self, individual): if individual is None: return None else: mol_graph = MolFromSmiles(individual.to_aromatic_smiles()) score = qed(mol_graph) return score, [score]
def get_qed(mol): """ Get QED value (or nan if no molecule) :param mol: molecule :return: float QED """ if mol is None: return np.nan return qed(mol)
def __init__(self, smiles, config): self.smiles = smiles self.possible_bonds = config.possible_bonds self.table_of_elements = config.table_of_elements self.vocab_nodes_encode = config.vocab_nodes_encode self.mol = Chem.MolFromSmiles(smiles) self.adj = self._get_adj_mat(smiles) self.node_list = self._get_node_list(smiles) self.num_atom = len(self.node_list) self.expand_mat = self._get_expand_mat(self.adj, self.node_list) self.property = {'qed': qed(self.mol), 'J_score': calc_score(self.mol)}
def cal_prop(q, return_dict_prop): nbits = 1024 while True: qqq = q.get() if qqq == 'DONE': # print('proc =', os.getpid()) break idx, smi = qqq # if idx%10000==0: # print(idx) mol = Chem.MolFromSmiles(smi) logP = MolLogP(mol) SAS = sascorer.calculateScore(mol) QED = qed(mol) MW = MolWt(mol) TPSA0 = TPSA(mol) return_dict_prop[idx] = [logP, SAS, QED, MW, TPSA0]
def __init__(self, smiles): self.smiles = smiles self.possible_bonds = possible_bonds self.table_of_elements = table_of_elements self.vocab_nodes_encode = vocab_nodes_encode self.mol = Chem.MolFromSmiles(smiles) self.adj = self._get_adj_mat(smiles) self.node_list = self._get_node_list(smiles) self.num_atom = len(self.node_list) self.expand_mat = self._get_expand_mat(self.adj, self.node_list) self.life_time = 0 self.pool_life_time = 0 self.similarity = -1 self.property = { 'qed': qed(self.mol), 'J_score': calc_score(self.mol), 'MW' : ExactMolWt(self.mol) } self.prior_flag = False
def QED(mol): """ Computes RDKit's QED score """ return qed(mol)
def compute(self, mol): return qed(mol)
def gen_new_mols(self, num_mols=1000, qed_thresh=0.5, max_carbons=6): print("\n# Generating new molecules ...") np.random.seed(self.config.seed) # Get 2D latent representation pca = PCA(n_components=2) latent_2d = pca.fit_transform(self.latent) print(pca.explained_variance_ratio_, np.sum(pca.explained_variance_ratio_)) # Sample latent 2D representation num_mols = min(num_mols, latent_2d.shape[0]) idxs = np.random.choice([i for i in range(latent_2d.shape[0])], num_mols, replace=False) latent_2d_sampled = latent_2d[idxs, :] plot_2d_distribution(latent_2d_sampled, os.path.join(self.gen_folder, 'plots/latent_2d_dist.png')) # Approximate latent 2D representation by a gaussian mean = np.mean(latent_2d_sampled, axis=0) print(mean) covariance = np.cov(latent_2d_sampled.T) print(covariance) latent_2d_gen = np.random.multivariate_normal(mean, covariance, num_mols) plot_2d_distribution(latent_2d_gen, os.path.join(self.gen_folder, 'plots/latent_2d_gen_dist.png')) for i in range(10, 100, 10): per = np.percentile(latent_2d_gen, i, axis=0) print("percentile %d ... PC0: %.4f, PC1: %.4f" % (i, per[0], per[1])) # Convert back generated 2D latent data to its original dimension latent_gen = pca.inverse_transform(latent_2d_gen) # Generate valid smiles from latent smiles_gen = [latent2smiles(self.lat2states_model, self.sample_model, latent_gen[i:i + 1], self.config.max_len) for i in tqdm(range(latent_gen.shape[0]))] check_quality_preds(smiles_gen, self.x_sample) smiles_gen = list(set(smiles_gen)) print("%d molecules without dups ..." % len(smiles_gen)) smiles_gen_valid = filter_valid_mols(smiles_gen, max_carbons=max_carbons) print("%d valid molecules remaining ..." % len(smiles_gen_valid)) # Filter molecules with QED higher than a threshold fig, ax = plt.subplots() qed_list = [] smiles_gen_valid_filt = [] for smi in smiles_gen_valid: try: qed_val = qed(Chem.MolFromSmiles(smi)) qed_list.append(qed_val) if qed_thresh is not None and (0 < qed_thresh < 1) and (qed_val > qed_thresh): smiles_gen_valid_filt.append(smi) except: continue sns.kdeplot(qed_list, ax=ax) plt.savefig(os.path.join(self.gen_folder, 'plots/qed_distribution.png'), bbox_inches='tight') if qed_thresh is not None and (0 < qed_thresh < 1): smiles_gen_valid = smiles_gen_valid_filt.copy() print("%d valid molecules with QED > %.2f ..." % (len(smiles_gen_valid), qed_thresh)) # Save smiles f = open(os.path.join(self.gen_folder, 'data/generated_%s.smi' % self.ft_index), 'w') for smi in smiles_gen_valid: f.write(smi + '\n') f.close() # Plot some images idxs = np.random.choice([i for i in range(len(smiles_gen_valid))], 9, replace=False) mols_sample = [] smis_sample = [] for i in idxs: smis_sample.append(smiles_gen_valid[i]) mols_sample.append(Chem.MolFromSmiles(smiles_gen_valid[i])) img = Draw.MolsToGridImage(mols_sample, molsPerRow=3, subImgSize=(400, 200)) img.save(os.path.join(self.gen_folder, 'images/generated_mols.png'))
smiles_f = open('./ZINC/smiles.txt') smiles_list = smiles_f.readlines() logPList = [] molWtList = [] TPSAList = [] QEDList = [] SASList = [] for smi in smiles_list: smi = smi.strip() m = Chem.MolFromSmiles(smi) molWt = ExactMolWt(m) logP = MolLogP(m) TPSA = CalcTPSA(m) _qed = qed(m) sas = calculateScore(m) logPList.append(logP) molWtList.append(molWt) TPSAList.append(TPSA) QEDList.append(_qed) SASList.append(sas) logPList = np.asarray(logPList) TPSAList = np.asarray(TPSAList) QEDList = np.asarray(QEDList) SASList = np.asarray(SASList) np.save('./ZINC/logP.npy', logPList) np.save('./ZINC/TPSA.npy', TPSAList)
from rdkit import Chem from rdkit.Chem.QED import qed with open('../id_smiles.txt') as f, open('data.txt', 'w') as w: for l in f: m_id, s1, s2 = l.split() m1, m2 = Chem.MolFromSmiles(s1), Chem.MolFromSmiles(s2) if m1 is None or m2 is None: continue c1, c2 = qed(m1), qed(m2) w.write(m_id + '\t' + str(c1) + '\t' + str(c2) + '\n')
if line[0] != '"': continue if line[1] != ",": smi = line[1:].strip() continue m = Chem.MolFromSmiles(smi) smi2 = Chem.MolToSmiles(m) property0 = line[2:].split(",") # logP=float(property0[0]) # SAS=float(property0[2]) # QED=float(property0[1]) logP = MolLogP(m) SAS = sascorer.calculateScore(m) QED = qed(m) MW = ExactMolWt(m) TPSA = CalcTPSA(m) line_out = "%s %6.3f %6.3f %6.3f %6.3f %6.3f\n" % (smi2, logP, SAS, QED, MW, TPSA) fp_out.write(line_out) logP_list += [logP] SAS_list += [SAS] QED_list += [QED] MW_list += [MW] TPSA_list += [TPSA] fp_out.close() logP_array = np.array(logP_list)
def QED(mol): ''' Computes RDKit's QED score ''' return qed(mol)
def step(self, action, force_final=False): a_first = action[0] a_second = action[1] a_edge = action[2] terminal = False total_num = self.mol.GetNumAtoms() self.steps = self.steps + 1 self.old_mol = copy.deepcopy(self.mol) property_max_index = -1 pass_test = True reward_step = 0 #原子数目已经达到上限,还要继续加就会报错或者达到上限之后加错边也会报错 if total_num == self.max_atom_num: if a_second >= self.max_atom_num: pass_test = False elif a_second < self.max_atom_num: if not self.add_bond(self.mol, a_first, a_second, a_edge): pass_test = False else: if a_second >= self.max_atom_num: self.add_atom(self.mol, a_second - self.max_atom_num) a_second = total_num if not self.add_bond(self.mol, a_first, a_second, a_edge): pass_test = False elif a_second < total_num: if not self.add_bond(self.mol, a_first, a_second, a_edge): pass_test = False else: pass_test = False if pass_test and self.val_check(self.mol): self.max_atom_invalid_count = 0 reward_step += self.reward_step_positive #/ self.max_atom_num self.reward_pool.append(qed(self.mol)) self.update() else: self.max_atom_invalid_count += 1 reward_step += self.reward_step_negative #/ self.max_atom_num # self.log('Step{0} valency test failed!'.format(self.steps)) self.log('Current atom Smile:' + Chem.MolToSmiles(self.old_mol)) self.mol = copy.deepcopy(self.old_mol) if self.max_atom_invalid_count >= self.max_error_count: terminal = True if terminal or force_final is True: property_max_index = np.argmax(self.reward_pool) #reward_step = self.reward_pool[int(property_max_index)] * self.reward_ratio['qed'] property_best = self.reward_pool[int(property_max_index)] # reward_step += self.qed_metric(property_best) self.smiles.append(Chem.MolToSmiles(self.mol)) info = { 'reward_step': reward_step, 'qed': self.reward_pool[-1], 'smiles': self.smiles[-1], 'best_index': property_max_index } return self.node_arr, self.adj, info, terminal
def get_qed(smi): mol = Chem.MolFromSmiles(smi) return qed(mol)
def reward_property(self, mol, reward_type, reward_ratio): reward = 0 if reward_type is 'qed': reward = qed(mol) * reward_ratio['qed'] return reward
if not '.' in MolToSmiles(mol): mols.append(mol) if len(mols) == num_of_sample: break ''' validity check ''' num_valid = 0 svgs = [] qeds = np.zeros(num_of_sample) for idx in range(num_of_sample): temp = MolFromSmiles(MolToSmiles(mols[idx])) if temp is not None: mols[idx] = temp num_valid += 1 qeds[idx] = qed(mols[idx]) print("Validity is {:.2%}".format(num_valid / 10000)) ''' uniqueness check ''' num_of_unique_gen = len(set([MolToSmiles(mol) for mol in mols])) print("Uniqueness is {:.2%}".format(num_of_unique_gen / num_of_sample)) ''' novelty check ''' data_tgt = [MolFromSmiles(i) for i in train_data] data_tgt += mols num_of_novel = len(set([ MolToSmiles(mol) for mol in data_tgt ])) + num_of_sample - len(train_data) - num_of_unique_gen print("Novelty is {:.2%}".format(num_of_novel / num_of_sample)) # ============================================================================= # draw, optional # =============================================================================