Ejemplo n.º 1
0
def evaluate(bit_vector):  # Takes a bitarray object
    # gene is a list of int, bit_vector a bitarray
    gene = BITtoGene(bit_vector)
    smile = opt.canonicalize(cfg_util.decode(
        opt.GenetoCFG(gene)))  # Transform the gene into a smile
    score = score_util.calc_score(smile)  # Calculate the J score
    return score
Ejemplo n.º 2
0
def save_log(population):
    save = input(
        "Save logs and image of final population ? (press 'y' or 'n') : ")
    if save == 'n':
        pass

    else:
        directory = input("Please input log file name (or directory) : ")

        # Creating a folder for this log

        os.system('mkdir ' + directory)
        file_name = directory

        # Stocking the final population in a pickle object

        f = open(directory + '/' + file_name + ".p", 'wb')

        # Remove double and non valid smile from the list befor stocking it

        ms = []
        smile_list = []
        for bit_vector in population:
            gene = BITtoGene(bit_vector)
            smile = opt.canonicalize(cfg_util.decode(opt.GenetoCFG(gene)))

            if smile != '' and smile != None and smile not in smile_list:
                if MolFromSmiles(smile) != None:
                    smile_list.append(smile)
                    ms.append(MolFromSmiles(smile))
        pickle.dump(smile_list, f)
        f.close()

        # Stocking the random.seed of this experiement in a text file

        f = open(directory + '/' + 'seed.txt', 'w')
        f.write(str(time) + '\n')
        #Stocking the final population and their score in the same file
        f.write('smile' + '\t' + 'score' + '\n')
        for smile in smile_list:
            score = score_util.calc_score(smile)
            f.write(smile + '\t' + str(score) + '\n')
        f.close()

    # Saving population Image

    if save == 'n':
        pass

    else:
        for i in range(len(ms)):
            Draw.MolToFile(ms[i],
                           directory + '/' + str(i) + '.png',
                           size=(120, 120))
        os.system(
            'montage ' + directory + '/*.png ' + directory + '/final.png'
        )  # Execute this command in the shell. Put all images of the molecules in a unique image
Ejemplo n.º 3
0
    def __init__(self, smiles):
        self.smiles = smiles

        self.possible_bonds = possible_bonds
        self.table_of_elements = table_of_elements
        self.vocab_nodes_encode = vocab_nodes_encode
        self.mol = Chem.MolFromSmiles(smiles)

        self.adj = self._get_adj_mat(smiles)
        self.node_list = self._get_node_list(smiles)
        self.num_atom = len(self.node_list)
        self.expand_mat = self._get_expand_mat(self.adj, self.node_list)
        self.life_time = 0
        self.pool_life_time = 0
        self.similarity = -1

        self.property = {
            'qed': qed(self.mol),
            'J_score': calc_score(self.mol),
            'MW' : ExactMolWt(self.mol)
        }
        self.prior_flag = False
Ejemplo n.º 4
0
def main(Pipes, island_id, nb_of_island, mig_interval, logn=-1):
    #parser = argparse.ArgumentParser()
    #parser.add_argument('--smifile', default='250k_rndm_zinc_drugs_clean.smi')
    #parser.add_argument('--seed', type=int, default=t.time())
    #args = parser.parse_args()

    smifile = '250k_rndm_zinc_drugs_clean.smi'
    if logn == -1:
        np.random.seed(0 + island_id)
    else:
        np.random.seed(int(t.time()))
    #np.random.seed(0)
    global best_smiles
    global best_score
    global all_smiles

    gene_length = 300

    N_mu = int(1000 / nb_of_island)
    N_lambda = int(2000 / nb_of_island)

    # initialize population
    seed_smiles = []
    with open(smifile) as f:
        for line in f:
            smiles = line.rstrip()
            seed_smiles.append(smiles)

    initial_smiles = np.random.choice(seed_smiles, N_mu + N_lambda)
    initial_smiles = [canonicalize(s) for s in initial_smiles]
    initial_genes = [
        CFGtoGene(cfg_util.encode(s), max_len=gene_length)
        for s in initial_smiles
    ]
    initial_scores = [score_util.calc_score(s) for s in initial_smiles]
    #print(initial_scores)
    population = []
    for score, gene, smiles in zip(initial_scores, initial_genes,
                                   initial_smiles):
        population.append((score, smiles, gene))

    population = sorted(population, key=lambda x: x[0], reverse=True)[:N_mu]

    th = threading.Timer(60, current_best, [])
    th.start()
    print("Start!")
    all_smiles = [p[1] for p in population]
    #print([p[0] for p in population])
    #mig_interval = 5 # A migration every 1000 iteration
    x = [i for i in range(mig_interval, 1000000000, mig_interval)
         ]  # All the generation in wich a migration should occur
    k = 1  # First migration
    t0 = t.time()
    for generation in range(1000000000):
        scores = [p[0] for p in population]
        mean_score = np.mean(scores)
        min_score = np.min(scores)
        std_score = np.std(scores)
        best_score = np.max(scores)
        idx = np.argmax(scores)
        best_smiles = population[idx][1]
        print("%{},{},{},{},{}".format(generation, best_score, mean_score,
                                       min_score, std_score))

        new_population = []
        for _ in range(N_lambda):
            p = population[np.random.randint(len(population))]
            p_gene = p[2]
            c_gene = mutation(p_gene)

            c_smiles = canonicalize(cfg_util.decode(GenetoCFG(c_gene)))
            if c_smiles not in all_smiles:
                c_score = score_util.calc_score(c_smiles)
                c = (c_score, c_smiles, c_gene)
                new_population.append(c)
                all_smiles.append(c_smiles)

        population.extend(new_population)
        population = sorted(population, key=lambda x: x[0],
                            reverse=True)[:N_mu]

        # Every mig_interval generation make
        if generation in x:
            print('Starting Migration')
            if k >= nb_of_island:
                k = 1
            population = migration(Pipes, island_id, nb_of_island, population,
                                   k)
            k += 1
        if t.time() - t0 >= 3600 * 8:
            break
    if logn == -1:
        f = open(
            str(island_id) + '_final_pop' + '_' + str(nb_of_island) + '_' +
            str(mig_interval) + '.csv', 'w')
    if logn != -1:
        f = open(
            str(island_id) + '_final_pop' + '_' + str(nb_of_island) + '_' +
            str(mig_interval) + '_' + str(logn) + '.csv', 'w')
    population = pd.DataFrame(population)
    population.to_csv(f)
    f.close()
Ejemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--smifile', default='250k_rndm_zinc_drugs_clean.smi')
    parser.add_argument('--seed', type=int, default=0)
    args = parser.parse_args()

    np.random.seed(args.seed)

    global best_smiles
    global best_score
    global all_smiles

    gene_length = 300

    N_mu = 100
    N_lambda = 200

    # initialize population
    seed_smiles = []
    with open(args.smifile) as f:
        for line in f:
            smiles = line.rstrip()
            seed_smiles.append(smiles)

    initial_smiles = np.random.choice(seed_smiles, N_mu + N_lambda)
    initial_smiles = [canonicalize(s) for s in initial_smiles]
    initial_genes = [
        CFGtoGene(cfg_util.encode(s), max_len=gene_length)
        for s in initial_smiles
    ]
    initial_scores = [score_util.calc_score(s) for s in initial_smiles]

    population = []
    for score, gene, smiles in zip(initial_scores, initial_genes,
                                   initial_smiles):
        population.append((score, smiles, gene))

    population = sorted(population, key=lambda x: x[0], reverse=True)[:N_mu]

    t = threading.Timer(60, current_best, [])
    t.start()
    print("Start!")
    all_smiles = [p[1] for p in population]
    for generation in range(1000000000):
        scores = [p[0] for p in population]
        mean_score = np.mean(scores)
        min_score = np.min(scores)
        std_score = np.std(scores)
        best_score = np.max(scores)
        idx = np.argmax(scores)
        best_smiles = population[idx][1]
        print("%{},{},{},{},{}".format(generation, best_score, mean_score,
                                       min_score, std_score))

        new_population = []
        for _ in range(N_lambda):
            p = population[np.random.randint(len(population))]
            p_gene = p[2]
            c_gene = mutation(p_gene)

            c_smiles = canonicalize(cfg_util.decode(GenetoCFG(c_gene)))
            if c_smiles not in all_smiles:
                c_score = score_util.calc_score(c_smiles)
                c = (c_score, c_smiles, c_gene)
                new_population.append(c)
                all_smiles.append(c_smiles)

        population.extend(new_population)
        population = sorted(population, key=lambda x: x[0],
                            reverse=True)[:N_mu]