예제 #1
0
def main():
    print("loading data")
    train_smiles = []
    filename = '250k_rndm_zinc_drugs_clean.smi'
    with open(filename) as f:
        for line in f:
            smiles = line.rstrip()
            train_smiles.append(smiles)
            if len(train_smiles) > 1000:
                break
    print("converting data")
    train_rules = cfg_util.encode(train_smiles)
    print("finished converting data")

    rnn = RNN(rule_size=len(zinc_grammar.GCFG.productions()))
    if os.path.exists("model.npz"):
        serializers.load_npz("model.npz", rnn)
    optimizer = optimizers.Adam()
    optimizer.setup(rnn)

    for _ in range(100):
        print("start pre-training")
        for epoch in range(10000):
            sequence = np.array([np.random.choice(train_rules)
                                 ]).astype(np.int32)
            loss = 0
            rnn.reset_state()
            for t in range(len(sequence[0]) - 1):
                with chainer.using_config('train', True):
                    loss += rnn(sequence[:, t], sequence[:, t + 1])
                    if t % 32 == 0 or t == len(sequence[0]) - 2:
                        rnn.cleargrads()
                        loss.backward()
                        loss.unchain_backward()
                        optimizer.update()
        serializers.save_npz("model.npz", rnn)
        print("model saved.")
        print("finish pre-training")
        rootstate = State(rnn=rnn)
        smiles = MCTS(rootstate, 10000)
예제 #2
0
def main():
    print("loading data")
    train_smiles = []
    filename = '250k_rndm_zinc_drugs_clean.smi'
    with open(filename) as f:
        for line in f:
            smiles = line.rstrip()
            train_smiles.append(smiles)
            if len(train_smiles) > 10:
                break
    print("converting data")
    train_rules = cfg_util.encode(train_smiles)
    print("finished converting data")

    rnn = RNN(rule_size=len(zinc_grammar.GCFG.productions()))
    if os.path.exists("model.npz"):
        serializers.load_npz("model.npz", rnn)
    optimizer = optimizers.Adam()
    optimizer.setup(rnn)

    rootstate = State(rnn=rnn)
    smiles = MCTS(rootstate, 10000)
예제 #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--smifile', default='250k_rndm_zinc_drugs_clean.smi')
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--mu', type=int, default=32)
    parser.add_argument('--lam', type=int, default=64)
    parser.add_argument('--generation', type=int, default=1000)
    args = parser.parse_args()

    np.random.seed(args.seed)

    gene_length = 300

    N_mu = args.mu
    N_lambda = args.lam

    # initialize population
    seed_smiles = []
    with open(args.smifile) as f:
        for line in f:
            smiles = line.rstrip()
            seed_smiles.append(smiles)

    start_time = time.time()

    initial_smiles = np.random.choice(seed_smiles, N_mu+N_lambda)
    initial_smiles = [s for s in initial_smiles]
    initial_genes = [CFGtoGene(cfg_util.encode(s), max_len=gene_length)
                     for s in initial_smiles]
    initial_scores = rdock_util.score_qsub(initial_smiles)

    population = []
    for score, gene, smiles in zip(initial_scores, initial_genes,
                                   initial_smiles):
        population.append((score, smiles, gene))

    population = sorted(population, key=lambda x: x[0])[:N_mu]

    all_smiles = [canonicalize(p[1]) for p in population]
    all_result = [(p[0], s) for p, s in zip(population, all_smiles)]

    scores = [p[0] for p in population]
    max_score = np.max(scores)
    elapsed_time = time.time() - start_time
    print("%{},{},{}".format(0, max_score, elapsed_time))
    for p in population:
        print("{},{}".format(p[0], p[1]))

    for generation in range(args.generation):
        new_population_smiles = []
        new_population_genes = []
        for _ in range(N_lambda):
            p = population[np.random.randint(len(population))]
            p_gene = p[2]
            c_gene = mutation(p_gene)

            c_smiles = canonicalize(cfg_util.decode(GenetoCFG(c_gene)))
            if c_smiles != '' and c_smiles not in all_smiles:
                new_population_smiles.append(c_smiles)
                new_population_genes.append(c_gene)
                all_smiles.append(c_smiles)

        new_population_scores = rdock_util.score_qsub(new_population_smiles)
        for score, gene, smiles in zip(new_population_scores,
                                       new_population_genes,
                                       new_population_smiles):
            population.append((score, smiles, gene))
            all_result.append((score, smiles))
        population = sorted(population, key=lambda x: x[0])[:N_mu]
        scores = [i[0] for i in population]
        max_score = np.max(scores)
        elapsed_time = time.time() - start_time
        print("%{},{},{}".format(generation+1, max_score, elapsed_time))
        for p in population:
            print("{},{}".format(p[0], p[1]))

    print("list of generated smiles:")
    for r in all_result:
        print("{},{}".format(r[0], r[1]))
예제 #4
0
def main(Pipes, island_id, nb_of_island, mig_interval, logn=-1):
    #parser = argparse.ArgumentParser()
    #parser.add_argument('--smifile', default='250k_rndm_zinc_drugs_clean.smi')
    #parser.add_argument('--seed', type=int, default=t.time())
    #args = parser.parse_args()

    smifile = '250k_rndm_zinc_drugs_clean.smi'
    if logn == -1:
        np.random.seed(0 + island_id)
    else:
        np.random.seed(int(t.time()))
    #np.random.seed(0)
    global best_smiles
    global best_score
    global all_smiles

    gene_length = 300

    N_mu = int(1000 / nb_of_island)
    N_lambda = int(2000 / nb_of_island)

    # initialize population
    seed_smiles = []
    with open(smifile) as f:
        for line in f:
            smiles = line.rstrip()
            seed_smiles.append(smiles)

    initial_smiles = np.random.choice(seed_smiles, N_mu + N_lambda)
    initial_smiles = [canonicalize(s) for s in initial_smiles]
    initial_genes = [
        CFGtoGene(cfg_util.encode(s), max_len=gene_length)
        for s in initial_smiles
    ]
    initial_scores = [score_util.calc_score(s) for s in initial_smiles]
    #print(initial_scores)
    population = []
    for score, gene, smiles in zip(initial_scores, initial_genes,
                                   initial_smiles):
        population.append((score, smiles, gene))

    population = sorted(population, key=lambda x: x[0], reverse=True)[:N_mu]

    th = threading.Timer(60, current_best, [])
    th.start()
    print("Start!")
    all_smiles = [p[1] for p in population]
    #print([p[0] for p in population])
    #mig_interval = 5 # A migration every 1000 iteration
    x = [i for i in range(mig_interval, 1000000000, mig_interval)
         ]  # All the generation in wich a migration should occur
    k = 1  # First migration
    t0 = t.time()
    for generation in range(1000000000):
        scores = [p[0] for p in population]
        mean_score = np.mean(scores)
        min_score = np.min(scores)
        std_score = np.std(scores)
        best_score = np.max(scores)
        idx = np.argmax(scores)
        best_smiles = population[idx][1]
        print("%{},{},{},{},{}".format(generation, best_score, mean_score,
                                       min_score, std_score))

        new_population = []
        for _ in range(N_lambda):
            p = population[np.random.randint(len(population))]
            p_gene = p[2]
            c_gene = mutation(p_gene)

            c_smiles = canonicalize(cfg_util.decode(GenetoCFG(c_gene)))
            if c_smiles not in all_smiles:
                c_score = score_util.calc_score(c_smiles)
                c = (c_score, c_smiles, c_gene)
                new_population.append(c)
                all_smiles.append(c_smiles)

        population.extend(new_population)
        population = sorted(population, key=lambda x: x[0],
                            reverse=True)[:N_mu]

        # Every mig_interval generation make
        if generation in x:
            print('Starting Migration')
            if k >= nb_of_island:
                k = 1
            population = migration(Pipes, island_id, nb_of_island, population,
                                   k)
            k += 1
        if t.time() - t0 >= 3600 * 8:
            break
    if logn == -1:
        f = open(
            str(island_id) + '_final_pop' + '_' + str(nb_of_island) + '_' +
            str(mig_interval) + '.csv', 'w')
    if logn != -1:
        f = open(
            str(island_id) + '_final_pop' + '_' + str(nb_of_island) + '_' +
            str(mig_interval) + '_' + str(logn) + '.csv', 'w')
    population = pd.DataFrame(population)
    population.to_csv(f)
    f.close()
예제 #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--smifile', default='250k_rndm_zinc_drugs_clean.smi')
    parser.add_argument('--seed', type=int, default=0)
    args = parser.parse_args()

    np.random.seed(args.seed)

    global best_smiles
    global best_score
    global all_smiles

    gene_length = 300

    N_mu = 100
    N_lambda = 200

    # initialize population
    seed_smiles = []
    with open(args.smifile) as f:
        for line in f:
            smiles = line.rstrip()
            seed_smiles.append(smiles)

    initial_smiles = np.random.choice(seed_smiles, N_mu + N_lambda)
    initial_smiles = [canonicalize(s) for s in initial_smiles]
    initial_genes = [
        CFGtoGene(cfg_util.encode(s), max_len=gene_length)
        for s in initial_smiles
    ]
    initial_scores = [score_util.calc_score(s) for s in initial_smiles]

    population = []
    for score, gene, smiles in zip(initial_scores, initial_genes,
                                   initial_smiles):
        population.append((score, smiles, gene))

    population = sorted(population, key=lambda x: x[0], reverse=True)[:N_mu]

    t = threading.Timer(60, current_best, [])
    t.start()
    print("Start!")
    all_smiles = [p[1] for p in population]
    for generation in range(1000000000):
        scores = [p[0] for p in population]
        mean_score = np.mean(scores)
        min_score = np.min(scores)
        std_score = np.std(scores)
        best_score = np.max(scores)
        idx = np.argmax(scores)
        best_smiles = population[idx][1]
        print("%{},{},{},{},{}".format(generation, best_score, mean_score,
                                       min_score, std_score))

        new_population = []
        for _ in range(N_lambda):
            p = population[np.random.randint(len(population))]
            p_gene = p[2]
            c_gene = mutation(p_gene)

            c_smiles = canonicalize(cfg_util.decode(GenetoCFG(c_gene)))
            if c_smiles not in all_smiles:
                c_score = score_util.calc_score(c_smiles)
                c = (c_score, c_smiles, c_gene)
                new_population.append(c)
                all_smiles.append(c_smiles)

        population.extend(new_population)
        population = sorted(population, key=lambda x: x[0],
                            reverse=True)[:N_mu]