def transfer_bandit(problem, dims, reps, trans, psize=50, gen=100, src_models=[], time_limits=None, sample_size=None): if time_limits is not None: assert len( time_limits ) == reps, "time_limits length does not match the repetition numbers" else: time_limits = [float('inf')] * reps if sample_size is None: sample_size = psize if trans['transfer'] and (not src_models): raise ValueError( 'No probabilistic models stored for transfer optimization.') init_func = lambda n: np.round(np.random.rand(n)) model_num = len(src_models) fitness_hist = np.zeros([reps, gen, psize]) fitness_time = np.zeros(( reps, gen, )) alpha = list() prob = list() avg_runtime = 0 time_passed = 0 for rep in range(reps): print('------------------------ rep: {} ---------------------'.format( rep)) start = time() alpha_rep = [] pop = get_pop_init(psize, dims, init_func) for i in range(psize): pop[i].fitness_calc(problem) bestfitness = np.max(pop).fitness fitness = Chromosome.fitness_to_numpy(pop) fitness_hist[rep, 0, :] = fitness prob_rep = np.zeros((gen, model_num)) prob_rep[0, :] = (1 / model_num) * np.ones( model_num) # Initial uniform probablity of src model selection cum_rew = np.zeros((model_num)) # Initial source rewards fitness_time[rep, 0] = time() - start time_passed = fitness_time[rep, 0] print('Generation 0 best fitness = %f' % bestfitness) for i in range(1, gen): start = time() cfitness = np.zeros(psize) if trans['transfer'] and i % trans['delta'] == 0: # Selecting the the probability model idx = roulette_wheel_selection( prob_rep[i - 1, :] ) # Selecting a model using roulette wheel selection technique sel_model = [src_models[idx]] # Applying EM algorithm and sampling from the mixture model mixModel = MixtureModel(sel_model) mixModel.createTable(Chromosome.genes_to_numpy(pop), True, 'umd') mixModel.EMstacking() alpha_rep.append(mixModel.alpha) mixModel.mutate(version='bandit') offsprings_tmp = mixModel.sample(sample_size) # Calculating Fitness offsprings = np.array([ Chromosome(offspring_tmp) for offspring_tmp in offsprings_tmp ]) for j in range(sample_size): cfitness[j] = offsprings[j].fitness_calc(problem) # Getting reward using importance sampling rew = mixModel.reward(model_num, offsprings_tmp, cfitness) # Updating probablities and rewards using exp3 algorithm prob_rep[i, :], cum_rew = EXP3(model_num, rew, idx, cum_rew, prob_rep[i - 1]) ################################################################# # print('Probabilities: {}'.format(prob_rep[i,:])) print('Mixture coefficients: %s' % np.array(mixModel.alpha)) else: # Crossover & Mutation offsprings = total_crossover(pop) for j in range(psize): offsprings[j].mutation(1 / dims) # Fitness Calculation for j in range(psize): cfitness[j] = offsprings[j].fitness_calc(problem) prob_rep[i, :] = prob_rep[i - 1, :] # print('prob_rep[i,:] ', prob_rep[i,:]) # Selection pop, fitness = total_selection(np.concatenate((pop, offsprings)), np.concatenate((fitness, cfitness)), psize) bestfitness = fitness[0] fitness_hist[rep, i, :] = fitness fitness_time[rep, i] = time() - start time_passed += fitness_time[rep, i] print('Generation %d best fitness = %f' % (i, bestfitness)) if time_limits[rep] < time_passed: break alpha.append(alpha_rep) prob.append(prob_rep) return fitness_hist, alpha, prob, fitness_time
def transfer_bandit(problem, src_models, n_vars, psize=100, sample_size=100, gen=100, muc=10, mum=10, reps=1, delta=2, build_model=False): if not src_models: raise ValueError( 'No probabilistic models stored for transfer optimization.') fitness_hist = np.zeros([reps, gen, psize]) fitness_time = np.zeros(( reps, gen, )) alpha = list() init_func = lambda n: np.random.rand(n) alpha = list() prob = list() model_num = len(src_models) pop = None for rep in range(reps): print('------------------------ rep: {} ---------------------'.format( rep)) start = time() alpha_rep = [] pop = get_pop_init(psize, n_vars, init_func, p_type='arm') for i in range(psize): pop[i].fitness_calc(*problem) bestfitness = np.max(pop).fitness fitness = Chromosome.fitness_to_numpy(pop) fitness_hist[rep, 0, :] = fitness prob_rep = np.zeros((gen, model_num)) prob_rep[0, :] = (1 / model_num) * np.ones( model_num) # Initial uniform probablity of src model selection cum_rew = np.zeros((model_num)) # Initial source rewards fitness_time[rep, 0] = time() - start print('Generation 0 best fitness = %f' % bestfitness) for i in range(1, gen): start = time() if i % delta == 0: cfitness = np.zeros(sample_size) # Selecting the the probability model idx = roulette_wheel_selection( prob_rep[i - 1, :] ) # Selecting a model using roulette wheel selection technique sel_model = [src_models[idx]] # Applying EM algorithm and sampling from the mixture model mixModel = MixtureModel(sel_model) mixModel.createTable(Chromosome.genes_to_numpy(pop), True, 'mvarnorm') mixModel.EMstacking() alpha_rep.append(mixModel.alpha) mixModel.mutate(version='bandit') offsprings_tmp = mixModel.sample(sample_size) # Calculating Fitness offsprings = np.array([ ChromosomeKA(offspring_tmp) for offspring_tmp in offsprings_tmp ]) for j in range(sample_size): cfitness[j] = offsprings[j].fitness_calc(*problem) # Getting reward using importance sampling rew = mixModel.reward(model_num, offsprings_tmp, cfitness) # Updating probablities and rewards using exp3 algorithm prob_rep[i, :], cum_rew = EXP3(model_num, rew, idx, cum_rew, prob_rep[i - 1]) ################################################################# # print('Mixture coefficients: %s' % np.array(mixModel.alpha)) else: # Crossover & Mutation randlist = np.random.permutation(psize) offsprings = np.ndarray(psize, dtype=object) for j in range(0, psize, 2): offsprings[j] = ChromosomeKA(n_vars) offsprings[j + 1] = ChromosomeKA(n_vars) p1 = randlist[j] p2 = randlist[j + 1] offsprings[j].genes, offsprings[j + 1].genes = sbx_crossover( pop[p1], pop[p2], muc, n_vars) offsprings[j].mutation(mum, n_vars) offsprings[j + 1].mutation(mum, n_vars) # Fitness Calculation cfitness = np.zeros(psize) for j in range(psize): cfitness[j] = offsprings[j].fitness_calc(*problem) prob_rep[i, :] = prob_rep[i - 1, :] if i % delta == 0: print('cfitness mean: ', np.mean(cfitness)) # Selection pop, fitness = total_selection(np.concatenate((pop, offsprings)), np.concatenate((fitness, cfitness)), psize) fitness_hist[rep, i, :] = fitness fitness_time[rep, i] = time() - start if fitness[0] > bestfitness: bestfitness = fitness[0] print('Generation %d best fitness = %f' % (i, bestfitness)) alpha.append(alpha_rep) prob.append(prob_rep) return fitness_hist, alpha, prob, fitness_time
def transfer_bandit(sLen, src_models, psize=50, gen=100, muc=10, mum=10, reps=1, delta=2, build_model=True): if not src_models: raise ValueError( 'No probabilistic models stored for transfer optimization.') init_func = lambda n: 12 * np.random.rand(n) - 6 fitness_hist = np.zeros([reps, gen, psize]) fitness_time = np.zeros(( reps, gen, )) alpha = list() prob = list() cart = PoledCart(sLen) n_input = 6 n_hidden = 10 n_output = 1 net = Net(n_input, n_hidden, n_output) n_vars = net.nVariables model_num = len(src_models) pop = None func_eval_nums = [] for rep in range(reps): print('-------------------- rep: {} -------------------'.format(rep)) start = time() alpha_rep = [] prob_rep = np.zeros((gen, model_num)) prob_rep[0, :] = (1 / model_num) * np.ones( model_num) # Initial uniform probablity of src model selection cum_rew = np.zeros((model_num)) # Initial source rewards func_eval_num = 0 solution_found = False pop = get_pop_init(psize, n_vars, init_func, p_type='double_pole') for j in range(psize): pop[j].fitness_calc(net, cart, sLen) if not solution_found: func_eval_num += 1 if pop[j].fitness - 2000 > -0.0001: solution_found = True bestfitness = np.max(pop).fitness fitness = Chromosome.fitness_to_numpy(pop) fitness_hist[rep, 0, :] = fitness fitness_time[rep, 0] = time() - start print('Generation 0 best fitness = %f' % bestfitness) for i in range(1, gen): start = time() cfitness = np.zeros(psize) if i % delta == 0: idx = roulette_wheel_selection( prob_rep[i - 1] ) # Selecting a model using roulette wheel selection technique sel_model = [src_models[idx]] mixModel = MixtureModel(sel_model) mixModel.createTable(Chromosome.genes_to_numpy(pop), True, 'mvarnorm') mixModel.EMstacking() alpha_rep = np.concatenate((alpha_rep, mixModel.alpha), axis=0) mixModel.mutate() offsprings_tmp = mixModel.sample(psize) # Calculating Fitness offsprings = np.array([ ChromosomePole(offspring_tmp) for offspring_tmp in offsprings_tmp ]) for j in range(psize): cfitness[j] = offsprings[j].fitness_calc(net, cart, sLen) if not solution_found: func_eval_num += 1 if cfitness[j] - 2000 > -0.0001: solution_found = True rew = mixModel.reward(model_num, offsprings_tmp, cfitness) # Updating probablities and rewards using exp3 algorithm prob_rep[i, :], cum_rew = EXP3(model_num, rew, idx, cum_rew, prob_rep[i - 1]) ################################################################# # print('Probabilities: {}'.format(prob_rep[i,:])) # print('Mixture coefficients: %s' % np.array(mixModel.alpha)) else: # Crossover & Mutation randlist = np.random.permutation(psize) offsprings = np.ndarray(psize, dtype=object) for j in range(0, psize, 2): offsprings[j] = ChromosomePole(n_vars) offsprings[j + 1] = ChromosomePole(n_vars) p1 = randlist[j] p2 = randlist[j + 1] offsprings[j].genes, offsprings[j + 1].genes = sbx_crossover( pop[p1], pop[p2], muc, n_vars) offsprings[j].mutation(mum, n_vars) offsprings[j + 1].mutation(mum, n_vars) # Fitness Calculation cfitness = np.zeros(psize) for j in range(psize): cfitness[j] = offsprings[j].fitness_calc(net, cart, sLen) if not solution_found: func_eval_num += 1 if cfitness[j] - 2000 > -0.0001: solution_found = True prob_rep[i, :] = prob_rep[i - 1, :] if i % delta == 0: print('cfitness mean: ', np.mean(cfitness)) # Selection pop, fitness = total_selection(np.concatenate((pop, offsprings)), np.concatenate((fitness, cfitness)), psize) fitness_hist[rep, i, :] = fitness fitness_time[rep, i] = time() - start if fitness[0] > bestfitness: bestfitness = fitness[0] print('Generation %d best fitness = %f' % (i, bestfitness)) print(fitness[0]) if fitness[0] - 2000 > -0.0001 and build_model: print('Solution found!') fitness_hist[rep, i:, :] = fitness[0] break func_eval_nums.append(func_eval_num if solution_found else None) alpha.append(alpha_rep) prob.append(prob_rep) return fitness_hist, alpha, fitness_time, func_eval_nums, prob