コード例 #1
0
ファイル: GA_dipole.py プロジェクト: hienerd/Chem_GA_Project
def find_poly_mw(population, poly_size, smiles_list):
    '''
    Calculates molecular weight of polymers

    Parameters
    ---------
    population: list
        list of polymers in population
    poly_size: int
        number of monomers per polymer
    smiles_list: list
        list of all possible monomer SMILES

    Returns
    -------
    poly_mw_list: list
        list of molecular weights of polymers in population
    '''
    poly_mw_list = []
    for polymer in population:
        # make polymer into SMILES string
        poly_smiles = utils.make_polymer_str(polymer, smiles_list, poly_size)
        # make polymer string into pybel molecule object
        mol = pybel.readstring('smi', poly_smiles)

        # add mw of polymer to list
        poly_mw = mol.molwt
        poly_mw_list.append(poly_mw)

    return poly_mw_list
コード例 #2
0
ファイル: GA_dipole.py プロジェクト: hienerd/Chem_GA_Project
def run_geo_opt(polymer, poly_size, smiles_list):
    '''
    Runs geometry optimization calculation on given polymer

    Parameters
    ---------
    polymer: list (specific format)
        [(#,#,#,#), A, B]
    poly_size: int
        number of monomers per polymer
    smiles_list: list
        list of all possible monomer SMILES

    '''
    # make file name string w/ convention monoIdx1_monoIdx2_fullNumerSequence
    file_name = utils.make_file_name(polymer, poly_size)

    # if output file already exists, skip xTB
    exists = os.path.isfile('output/%s.out' % (file_name))
    if exists:
        print("output file existed")
        return

    # make polymer into SMILES string
    poly_smiles = utils.make_polymer_str(polymer, smiles_list, poly_size)

    # make polymer string into pybel molecule object
    mol = pybel.readstring('smi', poly_smiles)
    utils.make3D(mol)

    # write polymer .xyz file to containing folder
    mol.write('xyz', 'input/%s.xyz' % (file_name), overwrite=True)

    # make directory to run xtb in for the polymer
    mkdir_poly = subprocess.call('(mkdir %s)' % (file_name), shell=True)

    # run xTB geometry optimization
    xtb = subprocess.call(
        '(cd %s && /ihome/ghutchison/geoffh/xtb/xtb ../input/%s.xyz --opt >../output/%s.out)'
        % (file_name, file_name, file_name),
        shell=True)

    save_opt_file = subprocess.call('(cp %s/xtbopt.xyz opt/%s_opt.xyz)' %
                                    (file_name, file_name),
                                    shell=True)

    # delete xtb run directory for the polymer
    del_polydir = subprocess.call('(rm -r %s)' % (file_name), shell=True)
コード例 #3
0
ファイル: GA_dipole.py プロジェクト: hienerd/Chem_GA_Project
def init_gen(pop_size, poly_size, num_mono_species, opt_property, perc,
             smiles_list):
    '''
    Initializes parameter, creates population, and runs initial generation

    Parameters
    ----------
    pop_size: int
        number of polymers in each generation
    poly_size: int
        number of monomers per polymer
    num_mono_species: int
        number of monomer species in each polymer (e.g. copolymer = 2)
    opt_property: str
        property being optimized
    perc: float
        percentage of number of monomers to compare with Spearman calculation
    smiles_list: list
        list of all possible monomer SMILES

    Returns
    -------
    params: list (specific format)
        [pop_size, poly_size, num_mono_species, opt_property, smiles_list, sequence_list, mono_list, population, poly_property_list, n, gen_counter, spear_counter, prop_value_counter]
    '''

    # create all possible numerical sequences for given number of monomer types
    sequence_list = utils.find_sequences(num_mono_species)

    n = int(len(smiles_list) * perc)
    # n_05 = int(len(smiles_list) * .05)
    # n_10 = int(len(smiles_list) * .10)
    # n_15 = int(len(smiles_list) * .15)

    # initialize generation counter
    gen_counter = 1

    # initialize convergence counter
    spear_counter = 0
    prop_value_counter = 0

    # create monomer frequency list [(mono index 1, frequency), (mono index 2, frequency),...]
    mono_list = []
    for x in range(len(smiles_list)):
        mono_list.append([x, 0])

    # create inital population as list of polymers
    population = []
    population_str = []
    counter = 0
    while counter < pop_size:
        # for polymer in range(pop_size):
        temp_poly = []

        # select sequence type for polymer
        poly_seq = sequence_list[random.randint(0, len(sequence_list) - 1)]
        temp_poly.append(poly_seq)

        # select monomer types for polymer
        for num in range(num_mono_species):
            # randomly select a monomer index
            poly_monomer = random.randint(0, len(smiles_list) - 1)
            temp_poly.append(poly_monomer)
            # increase frequency count for monomer in mono_list
            mono_list[poly_monomer][1] += 1

        # make SMILES string of polymer
        temp_poly_str = utils.make_polymer_str(temp_poly, smiles_list,
                                               poly_size)

        # add polymer to population
        # check for duplication - use str for comparison to avoid homopolymer, etc. type duplicates
        if temp_poly_str in population_str:
            pass
        else:
            population.append(temp_poly)
            population_str.append(temp_poly_str)
            counter += 1

    # find initial population properties
    if opt_property == 'mw':
        # calculate polymer molecular weights
        poly_property_list = find_poly_mw(population, poly_size, smiles_list)
    elif opt_property == 'dip':
        # initialize list of polarizabilities
        polar_list = []
        # calculate electronic properties for each polymer
        elec_prop_list = find_elec_prop(population, poly_size, smiles_list)
        poly_property_list = elec_prop_list[0]
        polar_list = elec_prop_list[1]
    elif opt_property == 'pol':
        # initialize list of dipole moments
        dip_list = []
        # calculate electronic properties for each polymer
        elec_prop_list = find_elec_prop(population, poly_size, smiles_list)
        poly_property_list = elec_prop_list[1]
        dip_list = elec_prop_list[0]

    else:
        print(
            "Error: opt_property not recognized. trace:main:initial pop properties"
        )

    # set initial values for min, max, and avg polymer weights
    min_test = min(poly_property_list)
    max_test = max(poly_property_list)
    avg_test = mean(poly_property_list)

    if opt_property == 'dip':
        compound = utils.make_file_name(
            population[poly_property_list.index(max_test)], poly_size)
        polar_val = polar_list[poly_property_list.index(max_test)]

    if opt_property == 'pol':
        compound = utils.make_file_name(
            population[poly_property_list.index(max_test)], poly_size)
        dip_val = dip_list[poly_property_list.index(max_test)]

    # create new output files
    analysis_file = open('gens_analysis.txt', 'w+')
    population_file = open('gens_population.txt', 'w+')
    values_file = open('gens_values.txt', 'w+')
    if opt_property == 'dip':
        dip_polar_file = open('gens_dip_polar.txt', 'w+')
    if opt_property == 'pol':
        polar_dip_file = open('gens_polar_dip.txt', 'w+')
    spear_file = open('gens_spear.txt', 'w+')

    # write files headers
    analysis_file.write('min, max, avg, spearman, \n')
    population_file.write('polymer populations \n')
    values_file.write('%s values \n' % (opt_property))
    if opt_property == 'dip':
        dip_polar_file.write('compound, gen, dipole, polar \n')
    if opt_property == 'pol':
        polar_dip_file.write('compound, gen, polar, dip \n')
    #spear_file.write('gen, spear_05, spear_10, spear_15 \n')

    # capture initial population data
    analysis_file.write('%f, %f, %f, n/a, \n' % (min_test, max_test, avg_test))
    if opt_property == 'dip':
        dip_polar_file.write('%s, %d, %f, %f, \n' %
                             (compound, 1, max_test, polar_val))
    if opt_property == 'pol':
        polar_dip_file.write('%s, %d, %f, %f, \n' %
                             (compound, 1, max_test, dip_val))
    spear_file.write('1, n/a, n/a, n/a, \n')

    # write polymer population to file
    for polymer in population:
        poly_name = utils.make_file_name(polymer, poly_size)
        population_file.write('%s, ' % (poly_name))
    population_file.write('\n')

    for value in poly_property_list:
        values_file.write('%f, ' % (value))
    values_file.write('\n')

    # close all output files
    analysis_file.close()
    population_file.close()
    values_file.close()
    if opt_property == 'dip':
        dip_polar_file.close()
    if opt_property == 'pol':
        polar_dip_file.close()
    spear_file.close()

    # make backup copies of output files
    shutil.copy('gens_analysis.txt', 'gens_analysis_copy.txt')
    shutil.copy('gens_population.txt', 'gens_population_copy.txt')
    shutil.copy('gens_values.txt', 'gens_values_copy.txt')
    if opt_property == 'dip':
        shutil.copy('gens_dip_polar.txt', 'gens_dip_polar_copy.txt')
    if opt_property == 'pol':
        shutil.copy('gens_polar_dip.txt', 'gens_polar_dip_copy.txt')
    shutil.copy('gens_spear.txt', 'gens_spear_copy.txt')

    params = [
        pop_size, poly_size, num_mono_species, opt_property, smiles_list,
        sequence_list, mono_list, population, poly_property_list, n,
        gen_counter, spear_counter, prop_value_counter
    ]
    return (params)
コード例 #4
0
ファイル: GA_dipole.py プロジェクト: hienerd/Chem_GA_Project
def crossover_mutate(parent_list, pop_size, poly_size, num_mono_species,
                     sequence_list, smiles_list, mono_list):
    '''
    Performs crossover and mutation functions on given population
    TODO: fix possible duplication problem after mutation

    Parameters
    ---------
    parent_list: list
        list of parent polymers
    pop_size: int
        number of polymers in each generation
    num_mono_species: int
        number of monomer species in each polymer (e.g. copolymer = 2)
    sequence_list: list
        list of sequences
    smiles_list: list
        list of all possible monomer SMILES

    Returns
    -------
    new_pop: list
        population list after crossover and mutation
    '''

    # initialize new population with parents
    new_pop = deepcopy(parent_list)
    new_pop_str = []
    for parent in new_pop:
        parent_str = utils.make_polymer_str(parent, smiles_list, poly_size)
        new_pop_str.append(parent_str)

    # loop until enough children have been added to reach population size
    while len(new_pop) < pop_size:

        # randomly select two parents (as indexes from parent list) to cross
        parent_a = random.randint(0, len(parent_list) - 1)
        parent_b = random.randint(0, len(parent_list) - 1)

        # ensure parents are unique indiviudals
        if len(parent_list) > 1:
            while parent_b == parent_a:
                parent_b = random.randint(0, len(parent_list) - 1)

        # determine number of monomers taken from parent A
        num_mono_a = random.randint(1, num_mono_species)

        # randomly determine which parent's sequence will be used
        par_seq = random.randint(0, 1)

        # create hybrid child
        temp_child = []

        # give child appropriate parent's sequence
        if par_seq == 0:
            temp_child.append(parent_list[parent_a][0])
        else:
            temp_child.append(parent_list[parent_b][0])

        # give child first half monomers from A, second half from B
        for monomer in range(1, num_mono_a + 1):
            temp_child.append(parent_list[parent_a][monomer])
        if num_mono_a < num_mono_species:
            for monomer in range(num_mono_a + 1, num_mono_species + 1):
                temp_child.append(parent_list[parent_b][monomer])

        # give child opportunity for mutation
        temp_child = mutate(temp_child, sequence_list, smiles_list, mono_list)

        temp_child_str = utils.make_polymer_str(temp_child, smiles_list,
                                                poly_size)

        # try to avoid duplicates in population, but prevent infinite loop if unique individual not found after so many attempts
        # TODO: fix possible duplication problem after mutation
        if temp_child_str in new_pop_str:
            pass
        else:
            new_pop.append(temp_child)
            new_pop_str.append(temp_child_str)

    return new_pop