def find_poly_mw(population, poly_size, smiles_list): ''' Calculates molecular weight of polymers Parameters --------- population: list list of polymers in population poly_size: int number of monomers per polymer smiles_list: list list of all possible monomer SMILES Returns ------- poly_mw_list: list list of molecular weights of polymers in population ''' poly_mw_list = [] for polymer in population: # make polymer into SMILES string poly_smiles = utils.make_polymer_str(polymer, smiles_list, poly_size) # make polymer string into pybel molecule object mol = pybel.readstring('smi', poly_smiles) # add mw of polymer to list poly_mw = mol.molwt poly_mw_list.append(poly_mw) return poly_mw_list
def run_geo_opt(polymer, poly_size, smiles_list): ''' Runs geometry optimization calculation on given polymer Parameters --------- polymer: list (specific format) [(#,#,#,#), A, B] poly_size: int number of monomers per polymer smiles_list: list list of all possible monomer SMILES ''' # make file name string w/ convention monoIdx1_monoIdx2_fullNumerSequence file_name = utils.make_file_name(polymer, poly_size) # if output file already exists, skip xTB exists = os.path.isfile('output/%s.out' % (file_name)) if exists: print("output file existed") return # make polymer into SMILES string poly_smiles = utils.make_polymer_str(polymer, smiles_list, poly_size) # make polymer string into pybel molecule object mol = pybel.readstring('smi', poly_smiles) utils.make3D(mol) # write polymer .xyz file to containing folder mol.write('xyz', 'input/%s.xyz' % (file_name), overwrite=True) # make directory to run xtb in for the polymer mkdir_poly = subprocess.call('(mkdir %s)' % (file_name), shell=True) # run xTB geometry optimization xtb = subprocess.call( '(cd %s && /ihome/ghutchison/geoffh/xtb/xtb ../input/%s.xyz --opt >../output/%s.out)' % (file_name, file_name, file_name), shell=True) save_opt_file = subprocess.call('(cp %s/xtbopt.xyz opt/%s_opt.xyz)' % (file_name, file_name), shell=True) # delete xtb run directory for the polymer del_polydir = subprocess.call('(rm -r %s)' % (file_name), shell=True)
def init_gen(pop_size, poly_size, num_mono_species, opt_property, perc, smiles_list): ''' Initializes parameter, creates population, and runs initial generation Parameters ---------- pop_size: int number of polymers in each generation poly_size: int number of monomers per polymer num_mono_species: int number of monomer species in each polymer (e.g. copolymer = 2) opt_property: str property being optimized perc: float percentage of number of monomers to compare with Spearman calculation smiles_list: list list of all possible monomer SMILES Returns ------- params: list (specific format) [pop_size, poly_size, num_mono_species, opt_property, smiles_list, sequence_list, mono_list, population, poly_property_list, n, gen_counter, spear_counter, prop_value_counter] ''' # create all possible numerical sequences for given number of monomer types sequence_list = utils.find_sequences(num_mono_species) n = int(len(smiles_list) * perc) # n_05 = int(len(smiles_list) * .05) # n_10 = int(len(smiles_list) * .10) # n_15 = int(len(smiles_list) * .15) # initialize generation counter gen_counter = 1 # initialize convergence counter spear_counter = 0 prop_value_counter = 0 # create monomer frequency list [(mono index 1, frequency), (mono index 2, frequency),...] mono_list = [] for x in range(len(smiles_list)): mono_list.append([x, 0]) # create inital population as list of polymers population = [] population_str = [] counter = 0 while counter < pop_size: # for polymer in range(pop_size): temp_poly = [] # select sequence type for polymer poly_seq = sequence_list[random.randint(0, len(sequence_list) - 1)] temp_poly.append(poly_seq) # select monomer types for polymer for num in range(num_mono_species): # randomly select a monomer index poly_monomer = random.randint(0, len(smiles_list) - 1) temp_poly.append(poly_monomer) # increase frequency count for monomer in mono_list mono_list[poly_monomer][1] += 1 # make SMILES string of polymer temp_poly_str = utils.make_polymer_str(temp_poly, smiles_list, poly_size) # add polymer to population # check for duplication - use str for comparison to avoid homopolymer, etc. type duplicates if temp_poly_str in population_str: pass else: population.append(temp_poly) population_str.append(temp_poly_str) counter += 1 # find initial population properties if opt_property == 'mw': # calculate polymer molecular weights poly_property_list = find_poly_mw(population, poly_size, smiles_list) elif opt_property == 'dip': # initialize list of polarizabilities polar_list = [] # calculate electronic properties for each polymer elec_prop_list = find_elec_prop(population, poly_size, smiles_list) poly_property_list = elec_prop_list[0] polar_list = elec_prop_list[1] elif opt_property == 'pol': # initialize list of dipole moments dip_list = [] # calculate electronic properties for each polymer elec_prop_list = find_elec_prop(population, poly_size, smiles_list) poly_property_list = elec_prop_list[1] dip_list = elec_prop_list[0] else: print( "Error: opt_property not recognized. trace:main:initial pop properties" ) # set initial values for min, max, and avg polymer weights min_test = min(poly_property_list) max_test = max(poly_property_list) avg_test = mean(poly_property_list) if opt_property == 'dip': compound = utils.make_file_name( population[poly_property_list.index(max_test)], poly_size) polar_val = polar_list[poly_property_list.index(max_test)] if opt_property == 'pol': compound = utils.make_file_name( population[poly_property_list.index(max_test)], poly_size) dip_val = dip_list[poly_property_list.index(max_test)] # create new output files analysis_file = open('gens_analysis.txt', 'w+') population_file = open('gens_population.txt', 'w+') values_file = open('gens_values.txt', 'w+') if opt_property == 'dip': dip_polar_file = open('gens_dip_polar.txt', 'w+') if opt_property == 'pol': polar_dip_file = open('gens_polar_dip.txt', 'w+') spear_file = open('gens_spear.txt', 'w+') # write files headers analysis_file.write('min, max, avg, spearman, \n') population_file.write('polymer populations \n') values_file.write('%s values \n' % (opt_property)) if opt_property == 'dip': dip_polar_file.write('compound, gen, dipole, polar \n') if opt_property == 'pol': polar_dip_file.write('compound, gen, polar, dip \n') #spear_file.write('gen, spear_05, spear_10, spear_15 \n') # capture initial population data analysis_file.write('%f, %f, %f, n/a, \n' % (min_test, max_test, avg_test)) if opt_property == 'dip': dip_polar_file.write('%s, %d, %f, %f, \n' % (compound, 1, max_test, polar_val)) if opt_property == 'pol': polar_dip_file.write('%s, %d, %f, %f, \n' % (compound, 1, max_test, dip_val)) spear_file.write('1, n/a, n/a, n/a, \n') # write polymer population to file for polymer in population: poly_name = utils.make_file_name(polymer, poly_size) population_file.write('%s, ' % (poly_name)) population_file.write('\n') for value in poly_property_list: values_file.write('%f, ' % (value)) values_file.write('\n') # close all output files analysis_file.close() population_file.close() values_file.close() if opt_property == 'dip': dip_polar_file.close() if opt_property == 'pol': polar_dip_file.close() spear_file.close() # make backup copies of output files shutil.copy('gens_analysis.txt', 'gens_analysis_copy.txt') shutil.copy('gens_population.txt', 'gens_population_copy.txt') shutil.copy('gens_values.txt', 'gens_values_copy.txt') if opt_property == 'dip': shutil.copy('gens_dip_polar.txt', 'gens_dip_polar_copy.txt') if opt_property == 'pol': shutil.copy('gens_polar_dip.txt', 'gens_polar_dip_copy.txt') shutil.copy('gens_spear.txt', 'gens_spear_copy.txt') params = [ pop_size, poly_size, num_mono_species, opt_property, smiles_list, sequence_list, mono_list, population, poly_property_list, n, gen_counter, spear_counter, prop_value_counter ] return (params)
def crossover_mutate(parent_list, pop_size, poly_size, num_mono_species, sequence_list, smiles_list, mono_list): ''' Performs crossover and mutation functions on given population TODO: fix possible duplication problem after mutation Parameters --------- parent_list: list list of parent polymers pop_size: int number of polymers in each generation num_mono_species: int number of monomer species in each polymer (e.g. copolymer = 2) sequence_list: list list of sequences smiles_list: list list of all possible monomer SMILES Returns ------- new_pop: list population list after crossover and mutation ''' # initialize new population with parents new_pop = deepcopy(parent_list) new_pop_str = [] for parent in new_pop: parent_str = utils.make_polymer_str(parent, smiles_list, poly_size) new_pop_str.append(parent_str) # loop until enough children have been added to reach population size while len(new_pop) < pop_size: # randomly select two parents (as indexes from parent list) to cross parent_a = random.randint(0, len(parent_list) - 1) parent_b = random.randint(0, len(parent_list) - 1) # ensure parents are unique indiviudals if len(parent_list) > 1: while parent_b == parent_a: parent_b = random.randint(0, len(parent_list) - 1) # determine number of monomers taken from parent A num_mono_a = random.randint(1, num_mono_species) # randomly determine which parent's sequence will be used par_seq = random.randint(0, 1) # create hybrid child temp_child = [] # give child appropriate parent's sequence if par_seq == 0: temp_child.append(parent_list[parent_a][0]) else: temp_child.append(parent_list[parent_b][0]) # give child first half monomers from A, second half from B for monomer in range(1, num_mono_a + 1): temp_child.append(parent_list[parent_a][monomer]) if num_mono_a < num_mono_species: for monomer in range(num_mono_a + 1, num_mono_species + 1): temp_child.append(parent_list[parent_b][monomer]) # give child opportunity for mutation temp_child = mutate(temp_child, sequence_list, smiles_list, mono_list) temp_child_str = utils.make_polymer_str(temp_child, smiles_list, poly_size) # try to avoid duplicates in population, but prevent infinite loop if unique individual not found after so many attempts # TODO: fix possible duplication problem after mutation if temp_child_str in new_pop_str: pass else: new_pop.append(temp_child) new_pop_str.append(temp_child_str) return new_pop