def main(fragment_file, lead_file): fragment_mols = read_file(fragment_file) lead_mols = read_file(lead_file) fragment_mols += lead_mols logging.info("Read %s molecules for fragmentation library", len(fragment_mols)) logging.info("Read %s lead moleculs", len(lead_mols)) fragments, used_mols = get_fragments(fragment_mols) logging.info("Num fragments: %s", len(fragments)) logging.info("Total molecules used: %s", len(used_mols)) assert len(fragments) assert len(used_mols) encodings, decodings = get_encodings(fragments) save_decodings(decodings) logging.info("Saved decodings") lead_mols = np.asarray( fragment_mols[-len(lead_mols):])[used_mols[-len(lead_mols):]] X = encode_list(lead_mols, encodings) logging.info("Building models") actor, critic = build_models(X.shape[1:]) X = clean_good(X, decodings) logging.info("Training") history = train(X, actor, critic, decodings) logging.info("Saving") np.save("History/history.npy", history)
def main(fragment_file, lead_file): fragment_mols = read_file(fragment_file) lead_mols = read_file(lead_file) fragment_mols += lead_mols logging.info("Read %s molecules for fragmentation library", len(fragment_mols)) logging.info("Read %s lead molecules", len(lead_mols)) fragments, used_mols = get_fragments(fragment_mols) logging.info("Num fragments: %s", len(fragments)) logging.info("Total molecules used: %s", len(used_mols)) assert len(fragments) assert len(used_mols) # ============================================================================= # encodings, decodings = get_encodings(fragments) # save_encodings(encodings) # save_decodings(decodings) # logging.info("Saved encodings and decodings") # ============================================================================= lead_mols = np.asarray( fragment_mols[-len(lead_mols):])[used_mols[-len(lead_mols):]] decodings = read_decodings() encodings = read_encodings() logging.info("Loaded encodings and decodings") X = encode_list(lead_mols, encodings) #print(X.shape) if X.shape[0] == 0: return -1 logging.info("Building models") actor, critic = build_models(X.shape[1:]) # X = clean_good(X, decodings) # logging.info("Remaining molecules after clean good: %s",X.shape[0]) if X.shape[0] == 0: return -1 logging.info("Training") history = train(X, actor, critic, decodings) logging.info("Saving") np.save("History/history.npy", history) actor.save('./saved_models/generation') critic.save('./saved_models/critic')
def main(fragment_file, lead_file): fragment_mols = read_file(fragment_file) lead_mols = read_file(lead_file) fragment_mols += lead_mols fragments, used_mols = get_fragments(fragment_mols) encodings, decodings = get_encodings(fragments) save_decodings(decodings) lead_mols = np.asarray(fragment_mols[-len(lead_mols):])[used_mols[-len(lead_mols):]] X = encode_list(lead_mols, encodings) actor, critic = build_models(X.shape[1:]) X = clean_good(X, decodings) history = train(X, actor, critic, decodings) np.save("History/history.npy", history)
def main(epoch,gen): if gen == 1: lead_file = "Data/trial.csv" fragment_file = "Data/molecules.smi" fragment_mols = read_file(fragment_file) lead_mols = read_file(lead_file) fragment_mols += lead_mols logging.info("Read %s molecules for fragmentation library", len(fragment_mols)) logging.info("Read %s lead molecules", len(lead_mols)) fragments, used_mols = get_fragments(fragment_mols) logging.info("Num fragments: %s", len(fragments)) logging.info("Total molecules used: %s", len(used_mols)) assert len(fragments) assert len(used_mols) lead_mols = np.asarray(fragment_mols[-len(lead_mols):])[used_mols[-len(lead_mols):]] decodings = read_decodings() encodings = read_encodings() logging.info("Loaded encodings and decodings") X = encode_list(lead_mols, encodings) modify_mols(X,decodings) epoch=1000 file_name = './past outputs/out'+str(epoch)+'.csv' logging.info("Collecting and storing all molecules in {}".format(file_name)) Show_Epoch.main(epoch,file_name) df = pd.read_csv('./past outputs/out'+str(epoch)+'.csv',sep=";") moli = [] molm = [] for i in range(len(df)): if (Chem.MolFromSmiles(df.iloc[i,1])) is not None: moli.append(Chem.MolFromSmiles(df.iloc[i,0])) molm.append(Chem.MolFromSmiles(df.iloc[i,1])) logging.info("Predicting pIC50 values of the initial molecules") ini = get_pIC50(moli) logging.info("Predicting pIC50 values of the predicted molecules") mod = get_pIC50(molm) ini = np.asarray(ini) mod = np.asarray(mod) changes = pd.DataFrame(np.transpose(np.asarray([ini,mod])),columns=['Initial_pIC','Modified_pIC']) changes['Initial_mol'] = df.iloc[:,0] changes['Modified_mol'] = df.iloc[:,1] changes['Delta'] = changes['Modified_pIC'] - changes['Initial_pIC'] changes.sort_values(by='Delta',ascending=False,inplace=True) inact_to_act = changes.loc[(changes['Modified_pIC']>7) & (changes['Initial_pIC']<7),['Modified_pIC','Initial_pIC','Delta']].sort_values(by='Delta',ascending=False) changes.to_csv('./past outputs/out_pIC'+str(epoch)+'.csv',index=False) inact_to_act.to_csv('./past outputs/act_pIC'+str(epoch)+'.csv',index=False) print(inact_to_act.head()) print(changes.head()) from rdkit.Chem import Draw moli = [] molm = [] for i in range(5): moli.append(Chem.MolFromSmiles(changes.iloc[i,2])) moli.append(Chem.MolFromSmiles(changes.iloc[i,3])) plot = Draw.MolsToGridImage(moli, molsPerRow=2) plot.show() #plot.save('/past outputs/epoch.png') bins = np.linspace(4,10,14) #changes = changes.loc[changes.Delta>0] plt.hist(changes['Initial_pIC'], bins, alpha=0.5, label='initial',color='blue') plt.hist(changes['Modified_pIC'], bins, alpha=0.5, label='modified',color='green') plt.legend(loc='upper right') plt.show() sp = changes.loc[changes['Delta']>0].sum()['Delta'] sn = changes.loc[changes['Delta']<0].sum()['Delta'] cp = changes.loc[changes['Delta']>0].count()['Delta'] cn = changes.loc[changes['Delta']<0].count()['Delta'] print('Sum of positive changes = {}\tNo. of +ves = {}\nSum of negative changes = {}\tNo. of -ves = {}'.format(sp,cp,sn,cn)) return 0