def main(): print("loading data") train_smiles = [] filename = '250k_rndm_zinc_drugs_clean.smi' with open(filename) as f: for line in f: smiles = line.rstrip() train_smiles.append(smiles) if len(train_smiles) > 1000: break print("converting data") train_rules = cfg_util.encode(train_smiles) print("finished converting data") rnn = RNN(rule_size=len(zinc_grammar.GCFG.productions())) if os.path.exists("model.npz"): serializers.load_npz("model.npz", rnn) optimizer = optimizers.Adam() optimizer.setup(rnn) for _ in range(100): print("start pre-training") for epoch in range(10000): sequence = np.array([np.random.choice(train_rules) ]).astype(np.int32) loss = 0 rnn.reset_state() for t in range(len(sequence[0]) - 1): with chainer.using_config('train', True): loss += rnn(sequence[:, t], sequence[:, t + 1]) if t % 32 == 0 or t == len(sequence[0]) - 2: rnn.cleargrads() loss.backward() loss.unchain_backward() optimizer.update() serializers.save_npz("model.npz", rnn) print("model saved.") print("finish pre-training") rootstate = State(rnn=rnn) smiles = MCTS(rootstate, 10000)
def main(): print("loading data") train_smiles = [] filename = '250k_rndm_zinc_drugs_clean.smi' with open(filename) as f: for line in f: smiles = line.rstrip() train_smiles.append(smiles) if len(train_smiles) > 10: break print("converting data") train_rules = cfg_util.encode(train_smiles) print("finished converting data") rnn = RNN(rule_size=len(zinc_grammar.GCFG.productions())) if os.path.exists("model.npz"): serializers.load_npz("model.npz", rnn) optimizer = optimizers.Adam() optimizer.setup(rnn) rootstate = State(rnn=rnn) smiles = MCTS(rootstate, 10000)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--smifile', default='250k_rndm_zinc_drugs_clean.smi') parser.add_argument('--seed', type=int, default=0) parser.add_argument('--mu', type=int, default=32) parser.add_argument('--lam', type=int, default=64) parser.add_argument('--generation', type=int, default=1000) args = parser.parse_args() np.random.seed(args.seed) gene_length = 300 N_mu = args.mu N_lambda = args.lam # initialize population seed_smiles = [] with open(args.smifile) as f: for line in f: smiles = line.rstrip() seed_smiles.append(smiles) start_time = time.time() initial_smiles = np.random.choice(seed_smiles, N_mu+N_lambda) initial_smiles = [s for s in initial_smiles] initial_genes = [CFGtoGene(cfg_util.encode(s), max_len=gene_length) for s in initial_smiles] initial_scores = rdock_util.score_qsub(initial_smiles) population = [] for score, gene, smiles in zip(initial_scores, initial_genes, initial_smiles): population.append((score, smiles, gene)) population = sorted(population, key=lambda x: x[0])[:N_mu] all_smiles = [canonicalize(p[1]) for p in population] all_result = [(p[0], s) for p, s in zip(population, all_smiles)] scores = [p[0] for p in population] max_score = np.max(scores) elapsed_time = time.time() - start_time print("%{},{},{}".format(0, max_score, elapsed_time)) for p in population: print("{},{}".format(p[0], p[1])) for generation in range(args.generation): new_population_smiles = [] new_population_genes = [] for _ in range(N_lambda): p = population[np.random.randint(len(population))] p_gene = p[2] c_gene = mutation(p_gene) c_smiles = canonicalize(cfg_util.decode(GenetoCFG(c_gene))) if c_smiles != '' and c_smiles not in all_smiles: new_population_smiles.append(c_smiles) new_population_genes.append(c_gene) all_smiles.append(c_smiles) new_population_scores = rdock_util.score_qsub(new_population_smiles) for score, gene, smiles in zip(new_population_scores, new_population_genes, new_population_smiles): population.append((score, smiles, gene)) all_result.append((score, smiles)) population = sorted(population, key=lambda x: x[0])[:N_mu] scores = [i[0] for i in population] max_score = np.max(scores) elapsed_time = time.time() - start_time print("%{},{},{}".format(generation+1, max_score, elapsed_time)) for p in population: print("{},{}".format(p[0], p[1])) print("list of generated smiles:") for r in all_result: print("{},{}".format(r[0], r[1]))
def main(Pipes, island_id, nb_of_island, mig_interval, logn=-1): #parser = argparse.ArgumentParser() #parser.add_argument('--smifile', default='250k_rndm_zinc_drugs_clean.smi') #parser.add_argument('--seed', type=int, default=t.time()) #args = parser.parse_args() smifile = '250k_rndm_zinc_drugs_clean.smi' if logn == -1: np.random.seed(0 + island_id) else: np.random.seed(int(t.time())) #np.random.seed(0) global best_smiles global best_score global all_smiles gene_length = 300 N_mu = int(1000 / nb_of_island) N_lambda = int(2000 / nb_of_island) # initialize population seed_smiles = [] with open(smifile) as f: for line in f: smiles = line.rstrip() seed_smiles.append(smiles) initial_smiles = np.random.choice(seed_smiles, N_mu + N_lambda) initial_smiles = [canonicalize(s) for s in initial_smiles] initial_genes = [ CFGtoGene(cfg_util.encode(s), max_len=gene_length) for s in initial_smiles ] initial_scores = [score_util.calc_score(s) for s in initial_smiles] #print(initial_scores) population = [] for score, gene, smiles in zip(initial_scores, initial_genes, initial_smiles): population.append((score, smiles, gene)) population = sorted(population, key=lambda x: x[0], reverse=True)[:N_mu] th = threading.Timer(60, current_best, []) th.start() print("Start!") all_smiles = [p[1] for p in population] #print([p[0] for p in population]) #mig_interval = 5 # A migration every 1000 iteration x = [i for i in range(mig_interval, 1000000000, mig_interval) ] # All the generation in wich a migration should occur k = 1 # First migration t0 = t.time() for generation in range(1000000000): scores = [p[0] for p in population] mean_score = np.mean(scores) min_score = np.min(scores) std_score = np.std(scores) best_score = np.max(scores) idx = np.argmax(scores) best_smiles = population[idx][1] print("%{},{},{},{},{}".format(generation, best_score, mean_score, min_score, std_score)) new_population = [] for _ in range(N_lambda): p = population[np.random.randint(len(population))] p_gene = p[2] c_gene = mutation(p_gene) c_smiles = canonicalize(cfg_util.decode(GenetoCFG(c_gene))) if c_smiles not in all_smiles: c_score = score_util.calc_score(c_smiles) c = (c_score, c_smiles, c_gene) new_population.append(c) all_smiles.append(c_smiles) population.extend(new_population) population = sorted(population, key=lambda x: x[0], reverse=True)[:N_mu] # Every mig_interval generation make if generation in x: print('Starting Migration') if k >= nb_of_island: k = 1 population = migration(Pipes, island_id, nb_of_island, population, k) k += 1 if t.time() - t0 >= 3600 * 8: break if logn == -1: f = open( str(island_id) + '_final_pop' + '_' + str(nb_of_island) + '_' + str(mig_interval) + '.csv', 'w') if logn != -1: f = open( str(island_id) + '_final_pop' + '_' + str(nb_of_island) + '_' + str(mig_interval) + '_' + str(logn) + '.csv', 'w') population = pd.DataFrame(population) population.to_csv(f) f.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--smifile', default='250k_rndm_zinc_drugs_clean.smi') parser.add_argument('--seed', type=int, default=0) args = parser.parse_args() np.random.seed(args.seed) global best_smiles global best_score global all_smiles gene_length = 300 N_mu = 100 N_lambda = 200 # initialize population seed_smiles = [] with open(args.smifile) as f: for line in f: smiles = line.rstrip() seed_smiles.append(smiles) initial_smiles = np.random.choice(seed_smiles, N_mu + N_lambda) initial_smiles = [canonicalize(s) for s in initial_smiles] initial_genes = [ CFGtoGene(cfg_util.encode(s), max_len=gene_length) for s in initial_smiles ] initial_scores = [score_util.calc_score(s) for s in initial_smiles] population = [] for score, gene, smiles in zip(initial_scores, initial_genes, initial_smiles): population.append((score, smiles, gene)) population = sorted(population, key=lambda x: x[0], reverse=True)[:N_mu] t = threading.Timer(60, current_best, []) t.start() print("Start!") all_smiles = [p[1] for p in population] for generation in range(1000000000): scores = [p[0] for p in population] mean_score = np.mean(scores) min_score = np.min(scores) std_score = np.std(scores) best_score = np.max(scores) idx = np.argmax(scores) best_smiles = population[idx][1] print("%{},{},{},{},{}".format(generation, best_score, mean_score, min_score, std_score)) new_population = [] for _ in range(N_lambda): p = population[np.random.randint(len(population))] p_gene = p[2] c_gene = mutation(p_gene) c_smiles = canonicalize(cfg_util.decode(GenetoCFG(c_gene))) if c_smiles not in all_smiles: c_score = score_util.calc_score(c_smiles) c = (c_score, c_smiles, c_gene) new_population.append(c) all_smiles.append(c_smiles) population.extend(new_population) population = sorted(population, key=lambda x: x[0], reverse=True)[:N_mu]