def initialize_settings(sigma_init=0.1, sigma_decay=0.9999): global population, filebase, game, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE population = num_worker * num_worker_trial filebase = 'log/' + gamename + '.' + optimizer + '.' + str( num_episode) + '.' + str(population) game = config.games[gamename] model = make_model(game) num_params = model.param_count print("size of model", num_params) if optimizer == 'ses': ses = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.2, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ses elif optimizer == 'ga': ga = SimpleGA(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ga elif optimizer == 'cma': cma = CMAES(num_params, sigma_init=sigma_init, popsize=population) es = cma elif optimizer == 'pepg': pepg = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.20, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, weight_decay=0.005, popsize=population) es = pepg else: oes = OpenES(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, antithetic=antithetic, weight_decay=0.005, popsize=population) es = oes PRECISION = 10000 SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial RESULT_PACKET_SIZE = 4 * num_worker_trial
def test_solver(solver, target_model): history = [] j = 0 len_seq = 2 while True: solutions = solver.ask() fitness_list = np.zeros(solver.popsize) fitness_list = pool.map(fit_func, [(model2, target_model, solutions[i], len_seq) for i in range(solver.popsize)]) solver.tell(fitness_list) result = solver.result() # first element is the best solution, second element is the best fitness history.append(result[1]) if (j+1) % 10 == 0: print("fitness at iteration", (j+1), result[1]) if (j+1) % 100 == 0: evluate_func_test((model2, target_model, result[0])) if -result[1] <= 1: len_seq += 1 print('Seq_len:', len_seq) new_elite_rewards = [] new_elite_params = solver.elite_params for kk in range(len(solver.elite_rewards)): # print('old',solver.elite_rewards[kk]) new_elite_rewards.append(fit_func((model2, target_model, solver.elite_params[kk], len_seq))) ga = SimpleGA(model2.param_count, # number of model parameters sigma_init=0.5, # initial standard deviation popsize=600, # population size elite_ratio=0.4, # percentage of the elites forget_best=False, # forget the historical best elites weight_decay=0.00, # weight decay coefficient ) solver.elite_params = new_elite_params solver.elite_rewards = new_elite_rewards if -result[1] <= 0.0001: print("local optimum discovered by solver:\n", result[0]) print("fitness score at this local optimum:", result[1]) return history, result j += 1
json.dump(args_dict, open(os.path.join(args.log_dir, "training_arguments.json"), "w"), indent=4) os.system("cp " + __file__ + " " + args.log_dir + "/" + __file__) os.system("mkdir " + args.log_dir + "/models") NPARAMS = model.param_count # make this a 100-dimensinal problem. NPOPULATION = args.n_population # use population size of 101. MAX_ITERATION = 4010 # run each solver for 5000 generations. seed_width = 10 # defines genetic algorithm solver ga = SimpleGA( NPARAMS, # number of model parameters sigma_init=0.5, # initial standard deviation popsize=NPOPULATION, # population size elite_ratio=0.1, # percentage of the elites forget_best=True, # forget the historical best elites weight_decay=0.00, # weight decay coefficient ) print(mp.cpu_count()) pool = mp.Pool(mp.cpu_count()) fit_func = evluate_func # defines a function to use solver to solve fit_func def test_solver(solver): history = [] j = 0 seed_width = 10 while True:
def test_solver(solver): history = [] j = 0 seed_width = 20 while True: solutions = solver.ask() fitness_list = np.zeros(solver.popsize) #print(solutions) # for i in range(solver.popsize): # fit_func((model,solutions[i])) seed = np.random.randint(seed_width) fitness_list = pool.map(fit_func, [(model, solutions[i], seed, 5) for i in range(solver.popsize)]) solver.tell(fitness_list) result = solver.result( ) # first element is the best solution, second element is the best fitness history.append(result[1]) if (j + 1) % 10 == 0: print("fitness at iteration", (j + 1), result[1]) if args.algo == 'ga': print("Best:", solver.elite_rewards[0]) print('Seed width', seed_width) if (j + 1) % 100 == 0: evluate_func_test(model, result[0]) plt.plot(history) plt.savefig(args.log_dir + '/loss_plot.png') plt.close() if (j + 1) % 1000 == 0: # save the best result filename = args.log_dir + '/models/model_parameters_' + str(j + 1) with open(filename, 'wt') as out: res = json.dump([np.array(result[0]).round(4).tolist()], out, sort_keys=True, indent=2, separators=(',', ': ')) # if (j+1) % 40== 0 and args.algo == 'ga': # #print('----------------------RESET ELITES') # #new_elite_rewards = [] # new_elite_params = solver.elite_params # new_elite_rewards = pool.map(fit_func, [(model,solver.elite_params[kk], seed, 5) for kk in range(len(solver.elite_rewards))]) # solver = SimpleGA(NPARAMS, # number of model parameters # sigma_init=0.5, # initial standard deviation # popsize=NPOPULATION, # population size # elite_ratio=0.05, # percentage of the elites # forget_best=False, # forget the historical best elites # weight_decay=0.00, # weight decay coefficient # ) # solver.elite_params = new_elite_params # solver.elite_rewards = new_elite_rewards # #print('----------------------RESET ELITES') if -result[1] <= 0.001: print("local optimum discovered by solver:\n", result[0]) print("fitness score at this local optimum:", result[1]) # save the best result filename = args.log_dir + '/models/model_parameters_' + str(j + 1) with open(filename, 'wt') as out: res = json.dump([np.array(result[0]).round(4).tolist()], out, sort_keys=True, indent=2, separators=(',', ': ')) seed_width += 5 new_elite_params = solver.elite_params new_elite_rewards = pool.map( fit_func, [(model, solver.elite_params[kk], seed, 5) for kk in range(len(solver.elite_rewards))]) solver = SimpleGA( NPARAMS, # number of model parameters sigma_init=0.5, # initial standard deviation popsize=NPOPULATION, # population size elite_ratio=0.05, # percentage of the elites forget_best=False, # forget the historical best elites weight_decay=0.00, # weight decay coefficient ) solver.elite_params = new_elite_params solver.elite_rewards = new_elite_rewards #return history, result j += 1
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999, init_opt = ''): global population, filebase, controller_filebase, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE population = num_worker * num_worker_trial filebase = './log/'+env_name+'.'+optimizer+'.'+str(num_episode)+'.'+str(population) controller_filebase = './controller/'+env_name+'.'+optimizer+'.'+str(num_episode)+'.'+str(population) model = make_model() num_params = model.param_count #print("size of model", num_params) if len(init_opt) > 0: es = pickle.load(open(init_opt, 'rb')) else: if optimizer == 'ses': ses = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.2, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ses elif optimizer == 'ga': ga = SimpleGA(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ga elif optimizer == 'cma': cma = CMAES(num_params, sigma_init=sigma_init, popsize=population) es = cma elif optimizer == 'pepg': pepg = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.20, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, weight_decay=0.005, popsize=population) es = pepg else: oes = OpenES(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, antithetic=antithetic, weight_decay=0.005, popsize=population) es = oes PRECISION = 10000 SOLUTION_PACKET_SIZE = (4+num_params)*num_worker_trial RESULT_PACKET_SIZE = 4*num_worker_trial
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999): global population, filebase, game, controller, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE population = num_worker * num_worker_trial filedir = 'results/{}/{}/log/'.format(exp_name, env_name) if not os.path.exists(filedir): os.makedirs(filedir) filebase = filedir + env_name + '.' + optimizer + '.' + str( num_episode) + '.' + str(population) controller = make_controller(args=config_args) num_params = controller.param_count print("size of model", num_params) if optimizer == 'ses': ses = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.2, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ses elif optimizer == 'ga': ga = SimpleGA(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ga elif optimizer == 'cma': cma = CMAES(num_params, sigma_init=sigma_init, popsize=population) es = cma elif optimizer == 'pepg': pepg = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.20, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, weight_decay=0.005, popsize=population) es = pepg else: oes = OpenES(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, antithetic=antithetic, weight_decay=0.005, popsize=population) es = oes PRECISION = 10000 SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial RESULT_PACKET_SIZE = 4 * num_worker_trial
def test_solver(solver): history = [] j = 0 seed_width = 100 while True: solutions = solver.ask() fitness_list = np.zeros(solver.popsize) #print(solutions) # for i in range(solver.popsize): # fit_func((model,solutions[i])) seed = 1 fitness_list = pool.map(fit_func, [(model, solutions[i], train_X, 10) for i in range(solver.popsize)]) if args.algo == 'ga': # For the elites testing one more time on 30 seed to be sure they are really good elite_idxs = np.argsort(fitness_list)[::-1][0:solver.elite_popsize] fitness_list_2 = pool.map( fit_func, [(model, solutions[elite_idx], train_X, 10) for elite_idx in elite_idxs]) for kk, elite_idx_ in enumerate(elite_idxs): fitness_list[elite_idx_] = fitness_list_2[kk] for kk in range(solver.popsize): if kk not in elite_idxs: fitness_list[kk] = -np.inf # # check how many new unchecked elites sneak in # new_elites_idxs = np.argsort(fitness_list)[::-1][0:solver.elite_popsize] # for new_elites_idx in new_elites_idxs: # if new_elites_idx not in elite_idxs: # print(new_elites_idx) solver.tell(fitness_list) result = solver.result( ) # first element is the best solution, second element is the best fitness history.append(result[1]) if (j + 1) % 10 == 0: print("fitness at iteration", (j + 1), result[1]) if args.algo == 'ga': print("Best:", solver.elite_rewards[0]) if args.algo == 'oes': print('Best:', solver.curr_best_reward) print('Seed width', seed_width) if (j + 1) % 100 == 0: #evluate_func_test(model, result[0]) plt.plot(history) plt.savefig(args.log_dir + '/loss_plot.png') plt.close() if (j + 1) % 1000 == 0: # save the best result filename = args.log_dir + '/models/model_parameters_' + str(j + 1) with open(filename, 'wt') as out: res = json.dump([np.array(result[0]).round(4).tolist()], out, sort_keys=True, indent=2, separators=(',', ': ')) # if (j+1) % 40== 0 and args.algo == 'ga': # #print('----------------------RESET ELITES') # #new_elite_rewards = [] # new_elite_params = solver.elite_params # new_elite_rewards = pool.map(fit_func, [(model,solver.elite_params[kk], seed, 5) for kk in range(len(solver.elite_rewards))]) # solver = SimpleGA(NPARAMS, # number of model parameters # sigma_init=0.5, # initial standard deviation # popsize=NPOPULATION, # population size # elite_ratio=0.05, # percentage of the elites # forget_best=False, # forget the historical best elites # weight_decay=0.00, # weight decay coefficient # ) # solver.elite_params = new_elite_params # solver.elite_rewards = new_elite_rewards # #print('----------------------RESET ELITES') if -result[1] <= 0.001: print("local optimum discovered by solver:\n", result[0]) print("fitness score at this local optimum:", result[1]) # save the best result filename = args.log_dir + '/models/model_parameters_' + str(j + 1) with open(filename, 'wt') as out: res = json.dump([np.array(result[0]).round(4).tolist()], out, sort_keys=True, indent=2, separators=(',', ': ')) seed_width += 5 if args.algo == 'ga': new_elite_params = solver.elite_params new_elite_rewards = pool.map( fit_func, [(model, solver.elite_params[kk], train_X, 5) for kk in range(len(solver.elite_rewards))]) solver = SimpleGA( NPARAMS, # number of model parameters sigma_init=0.5, # initial standard deviation popsize=NPOPULATION, # population size elite_ratio=0.05, # percentage of the elites forget_best=False, # forget the historical best elites weight_decay=0.00, # weight decay coefficient ) solver.elite_params = new_elite_params solver.elite_rewards = new_elite_rewards #return history, result j += 1
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999): global population, filebase, game, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE, model_name, novelty_search, unique_id, novelty_mode, BC_SIZE, ns_mode population = num_worker * num_worker_trial os.makedirs(os.path.join(ROOT, 'log'), exist_ok=True) filebase = os.path.join(ROOT, 'log', gamename+'.'+optimizer+'.'+ model_name + '.' + str(num_episode)+'.'+str(population)) + '.' + unique_id if novelty_search: filebase = filebase + '.novelty' if novelty_mode == 'h': BC_SIZE = H_SIZE elif novelty_mode == 'z': BC_SIZE = Z_SIZE elif novelty_mode =='h_concat': BC_SIZE = BC_SEQ_LENGTH * H_SIZE #NOVELTY_THRESHOLD = 180 elif novelty_mode == 'z_concat': BC_SIZE = BC_SEQ_LENGTH * Z_SIZE elif novelty_mode == 'a_concat': BC_SIZE = BC_SEQ_LENGTH * A_SIZE else: BC_SIZE = 9 # dummy bc size not used because the reward if the distance travelled. if novelty_mode: filebase = filebase + '.' + novelty_mode if ns_mode: filebase = filebase + '.' + ns_mode model = make_model(model_name, load_model=True) num_params = model.param_count print("size of model", num_params) PRECISION = 10000 SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial RESULT_PACKET_SIZE = (4 + BC_SIZE) * num_worker_trial if optimizer == 'ses': ses = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.2, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ses elif optimizer == 'ga': ga = SimpleGA(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, elite_ratio=0.1, weight_decay=0.005, popsize=population) es = ga elif optimizer == 'cma': cma = CMAES(num_params, sigma_init=sigma_init, popsize=population) es = cma elif optimizer == 'pepg': pepg = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.20, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, weight_decay=0.005, popsize=population) es = pepg else: oes = OpenES(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, antithetic=antithetic, weight_decay=0.005, popsize=population) es = oes
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999, weight_decay=0.005): global population, filebase, game, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE population = num_worker * num_worker_trial filebase = 'log/' + gamename + '.' + optimizer + '.' + str( num_episode) + '.' + str(population) game = config.games[gamename] model = make_model(game) num_params = model.param_count print("size of model", num_params) if optimizer == 'ses': ses = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.2, sigma_limit=0.02, elite_ratio=0.1, weight_decay=weight_decay, popsize=population) es = ses elif optimizer == 'ga': ga = SimpleGA(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, elite_ratio=0.1, weight_decay=weight_decay, popsize=population) es = ga elif optimizer == 'cma': cma = CMAES(num_params, sigma_init=sigma_init, popsize=population, weight_decay=weight_decay) es = cma elif optimizer == 'pepg': pepg = PEPG(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_alpha=0.20, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, weight_decay=weight_decay, popsize=population) es = pepg elif optimizer == 'oes': oes = OpenES(num_params, sigma_init=sigma_init, sigma_decay=sigma_decay, sigma_limit=0.02, learning_rate=0.01, learning_rate_decay=1.0, learning_rate_limit=0.01, antithetic=antithetic, weight_decay=weight_decay, popsize=population) es = oes # elif optimizer == 'pso': # pso = PSO(num_params, # sigma_init=sigma_init, # weight_decay=weight_decay, # popsize=population) # es = pso elif optimizer == 'global_pso': pso = Pyswarms(num_params, sigma_init=sigma_init, weight_decay=weight_decay, popsize=population, communication_topology='global') es = pso elif optimizer == 'local_pso': pso = Pyswarms(num_params, sigma_init=sigma_init, weight_decay=weight_decay, popsize=population, communication_topology='local') es = pso elif optimizer == 'random_pso': pso = Pyswarms(num_params, sigma_init=sigma_init, weight_decay=weight_decay, popsize=population, communication_topology='random') es = pso else: if optimizer in list(sorted(ng.optimizers.registry.keys())): ng_optimizer = Nevergrad(optimizer, num_params, sigma_init=sigma_init, popsize=population, weight_decay=weight_decay) es = ng_optimizer else: raise ValueError('Could not find optimizer!') PRECISION = 10000 SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial RESULT_PACKET_SIZE = 4 * num_worker_trial