def main(epoch): decodings2 = read_decodings() in_mols = np.load("History/in-{}.npy".format(epoch)) out_mols = np.load("History/out-{}.npy".format(epoch)) in_mols = [decode(m, decodings2) for m in in_mols] out_mols = [safe_decode(m, decodings2) for m in out_mols] use = [(not out_mols[i] is None) and \ Chem.MolToSmiles(out_mols[i]) != Chem.MolToSmiles(in_mols[i]) for i in range(len(out_mols))] plot_mols = [[m1, m2] for m1, m2, u in zip(in_mols, out_mols, use) if u] order = [ np.sum(evaluate_chem_mol(out_mols[i])) for i in range(len(out_mols)) if use[i] ] plot_mols = [x for _, x in sorted(zip(order, plot_mols), reverse=True)] plot_mols = [x for y in plot_mols for x in y] # # plot = Draw.MolsToGridImage(plot_mols[:50], molsPerRow=2) plot.show() plot.save("out_grid.png")
def safe_decode(x, decodings): try: m = decode(x, decodings) Chem.Kekulize(m) return m except: return None
def main(epoch, savefile=None, imagefile=None): decodings2 = read_decodings() in_mols = np.load("History/in-{}.npy".format(epoch)) out_mols = np.load("History/out-{}.npy".format(epoch)) in_mols = [decode(m, decodings2) for m in in_mols] out_mols = [safe_decode(m, decodings2) for m in out_mols] use = [(not out_mols[i] is None) and \ Chem.MolToSmiles(out_mols[i]) != Chem.MolToSmiles(in_mols[i]) for i in range(len(out_mols))] plot_mols = [[m1, m2] for m1, m2, u in zip(in_mols, out_mols, use) if u] # order = [np.sum(evaluate_chem_mol(out_mols[i])) for i in range(len(out_mols)) if use[i]] # plot_mols = [x for _,x in sorted(zip(order,plot_mols),key=lambda x:x[0], # reverse=True)] plot_mols = [x for y in plot_mols for x in y] plot = Draw.MolsToGridImage(plot_mols[:50], molsPerRow=2) if not imagefile is None: plot.save(imagefile) plot.show() if not savefile is None: with open(savefile, "w") as f: f.write("Initial molecule ; Modified molecule\n") for i in range(0, len(plot_mols), 2): f.write( f'{Chem.MolToSmiles(plot_mols[i])} ; {Chem.MolToSmiles(plot_mols[i+1])}\n' )
def bunch_eval(fs, epoch, decodings): global evaluated_mols keys = [] od = OrderedDict() #total_molecules = len(fs) #print("Evaluating totally {} molecules".format(total_molecules)) for f in fs: key = get_key(f) keys.append(key) od[key] = ([False,False]) #print(len(od)) to_evaluate = [] i = 0 unused = keys.copy() for key in keys: if key in evaluated_mols: od[key] = evaluated_mols[key][0] while key in unused: unused.remove(key) else: try: mol = decode(fs[i], decodings) od[key] = len(to_evaluate) to_evaluate.append(mol) # evaluated_mols[key] = (np.array(ret_val), epoch) except: od[key] = [False,-10] evaluated_mols[key] = (np.asarray([False,-10]),epoch) i = i + 1 print('New molecules for evaluation: {}'.format(len(to_evaluate))) if len(to_evaluate)!=0: Evaluations = bunch_evaluation(to_evaluate) #print("Length of Evaluations {}".format(len(Evaluations))) assert len(Evaluations) == len(to_evaluate) for i in range(len(Evaluations)): for key in unused: if od[key] == i: value = Evaluations[i] od[key] = value evaluated_mols[key] = (np.array(value),epoch) ret_vals = [] with open('./ret_vals.pkl','wb') as f: pickle.dump(ret_vals,f) for key in keys: ret_vals.append(np.asarray(od[key])) ret_vals = np.asarray(ret_vals) print('Shape of return values {}'.format(ret_vals.shape)) return (ret_vals) # ============================================================================= # df = pd.read_csv('./out.csv',engine="python") # for i in range(len(df)): # print("Molecule number {}".format(i+1)) # mol1 = Chem.MolFromSmiles(df.iloc[i,0]) # mol2 = Chem.MolFromSmiles(df.iloc[i,1]) # print(get_pIC(mol1),get_pIC(mol2)) # =============================================================================
def evaluate_mol(fs, epoch, decodings): global evaluated_mols key = get_key(fs) if evaluated_mols.has_key(key): return evaluated_mols[key][0] try: mol = decode(fs, decodings) ret_val = evaluate_chem_mol(mol) except: ret_val = [False] * 4 evaluated_mols[key] = (np.array(ret_val), epoch) return np.array(ret_val)