def main(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--smooth', help='How many points to smooth', type=int, default=200) parser.add_argument('--font-size', help='Font size on plots', type=int, default=24) args = parser.parse_args() # Main plot for each experiment for experiment_folder in glob.iglob('./results/solarfox-ls-pcg-progressive-fixed/'): title = experiment_folder.split('/')[-2].replace('-', ' ').title() title = title.replace('Pcg', 'PCG').replace('Ls ', '') path = os.path.join(experiment_folder, 'plots/') data = [] i = 0 for experiment_log in glob.iglob(os.path.join(experiment_folder, 'logs/*.log')): i += 1 experiment_data = load(experiment_log) experiment_title = title + " " + str(i) data.append(experiment_data) #plot(path, experiment_title, experiment_data, smooth=args.smooth, fontsize=args.font_size, multiple=False) plt.clf() make_path(path) title = "Progressive PCG in Solarfox" plot(path, title, data, smooth=args.smooth, fontsize=args.font_size, multiple=True, ymin_lim=None, ymax_lim=None) plt.clf() # Mixed plot for each experiment '''
def get_selector(selector_name, game, path, fixed=False): # Register classes for sharing across procs for c in [ RandomSelector, RandomWithDifSelector, RandomPCGSelector, RandomWithDifPCGSelector, ProgressivePCGSelector, SequentialSelector ]: BaseManager.register(c.__name__, c) manager = BaseManager() manager.start() # Determine selector if selector_name is not None: make_path(path) path = os.path.realpath(path) if selector_name == "random-all": selector = manager.RandomSelector(path, game, [0, 1, 2, 3, 4]) elif selector_name == "random-0123": selector = manager.RandomSelector(path, game, [0, 1, 2, 3]) elif selector_name.startswith('random-'): difficulty = float(selector_name.split('random-')[1]) * 0.1 selector = manager.RandomWithDifSelector( path, game, difficulty) elif selector_name.startswith('seq-'): difficulty = float(selector_name.split('seq-')[1]) * 0.1 selector = manager.SequentialSelector(path, game, difficulty) elif selector_name == "pcg-random": selector = manager.RandomPCGSelector(path, game) elif selector_name.startswith('pcg-random-'): difficulty = float(selector_name.split('pcg-random-')[1]) * 0.1 selector = manager.RandomWithDifPCGSelector(path, game, difficulty, fixed=fixed) elif selector_name == "pcg-progressive": selector = manager.ProgressivePCGSelector(path, game) elif selector_name == "pcg-progressive-fixed": selector = manager.ProgressivePCGSelector(path, game, upper_limit=False) else: raise Exception("Unknown level selector: + " + selector_name) else: return None return selector
def get_selector(selector_name, game, path, fixed=False, max=-1): # pdb.set_trace() # Register classes for sharing across procs for c in [RandomSelector, RandomWithDifSelector, SequentialHumanLevelSelector, RandomPCGSelector,RandomWithDifPCGSelector, ProgressivePCGSelector, SequentialSelector, ABTestSelector, MapEliteSelector]: BaseManager.register(c.__name__, c) manager = BaseManager() manager.start() # Determine selector if selector_name is not None: make_path(path) path = os.path.realpath(path) if selector_name == "ab-test": selector = manager.ABTestSelector(path, game, "levels_2", max=max) elif selector_name == "random-all": selector = manager.RandomSelector(path, game, [0, 1, 2, 3, 4], max=max) elif selector_name == "random-0123": selector = manager.RandomSelector(path, game, [0, 1, 2, 3], max=max) elif selector_name.startswith('random-'): difficulty = float(selector_name.split('random-')[1]) * 0.1 selector = manager.RandomWithDifSelector(path, game, difficulty, max=max) elif selector_name.startswith('seq-human-'): level_id = int(selector_name.split('seq-human-')[1]) selector = manager.SequentialHumanLevelSelector(path, game, level_id, max=max) elif selector_name.startswith('seq-'): difficulty = float(selector_name.split('seq-')[1]) * 0.1 selector = manager.SequentialSelector(path, game, difficulty, max=max) elif selector_name == "pcg-random": selector = manager.RandomPCGSelector(path, game, max=max) elif selector_name.startswith('pcg-random-'): difficulty = float(selector_name.split('pcg-random-')[1]) * 0.1 selector = manager.RandomWithDifPCGSelector(path, game, difficulty, fixed=fixed, max=max) elif selector_name == "pcg-progressive": selector = manager.ProgressivePCGSelector(path, game, max=max) elif selector_name == "pcg-progressive-fixed": selector = manager.ProgressivePCGSelector(path, game, upper_limit=False, max=max) elif selector_name == "map-elite": selector = manager.MapEliteSelector(path, game, max=max) else: raise Exception("Unknown level selector: + " + selector_name) else: return None return selector
def main(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--smooth', help='How many points to smooth', type=int, default=10) parser.add_argument('--font-size', help='Font size on plots', type=int, default=14) args = parser.parse_args() # Main plot for each experiment for experiment_folder in glob.iglob('./results/*/'): title = experiment_folder.split('/')[-2].replace('-', ' ').title() title = title.replace('Pcg', 'PCG').replace('Ls ', '') path = os.path.join(experiment_folder, 'plots/') make_path(path) data = [] i = 0 for experiment_log in glob.iglob(os.path.join(experiment_folder, 'logs/*.log')): i += 1 experiment_data = load(experiment_log) experiment_title = title + " " + str(i) data.append(experiment_data) plot(path, experiment_title, experiment_data, smooth=args.smooth, fontsize=args.font_size, multiple=False) plt.clf() plot(path, title, data, smooth=args.smooth, fontsize=args.font_size, multiple=True) plt.clf() # Mixed plot for each experiment titles = [] datasets = [] for experiment_folder in glob.iglob('./results/*pcg-random*/'): title = experiment_folder.split('/')[-2].replace('-', ' ').title() title = title.replace('Pcg', 'PCG').replace('Ls ', '') data = [] for experiment_log in glob.iglob(os.path.join(experiment_folder, 'logs/*.log')): experiment_data = load(experiment_log) data.append(experiment_data) datasets.append(data) titles.append(title) if len(titles) > 0: path = './plots/' make_path(path) plot_mixed(path, "PCG with Fixed Difficulty", titles, datasets, smooth=args.smooth, fontsize=args.font_size) plt.clf()
def save(save_path): ps = sess.run(params) make_path(osp.dirname(save_path)) joblib.dump(ps, save_path)
def save(save_path): ps = sess.run(params) make_path(save_path) joblib.dump(ps, save_path)
def test_on(game, level, selector, experiment_name, experiment_id, policy, num_envs=1, seed=0, runs=100, render=False, record_path=None, save_results=True, model_steps=-1): # Environment name env_id = "gvgai-" + game + "-lvl" + str(level) + "-v0" # Test name test_name = game if selector is not None: test_name += "-ls-" + selector else: test_name += "-lvl-" + str(level) print("Test name: " + test_name) print('Training name: ' + experiment_name) print("Training id: " + experiment_id) # Folders score_path = './results/' + experiment_name + '/eval/' + test_name + '/scores/' level_path = './results/' + experiment_name + '/eval/' + test_name + '/levels/' make_path(level_path) make_path(score_path) # Create file and override if necessary score_file = score_path + test_name + "_" + experiment_id + ".dat" with open(score_file, 'w+') as myfile: myfile.write('') # Level selector level_selector = LevelSelector.get_selector(selector, game, level_path, max=runs) env = make_gvgai_env(env_id, num_envs, seed, level_selector=level_selector) # Main plots per experiment mean_scores = [] std_scores = [] model_folder = './results/' + experiment_name + '/models/' + experiment_id + "/" # Find number of steps for last model if model_steps < 0: for model_meta_name in glob.iglob(model_folder + '*.meta'): print(model_meta_name) s = int(model_meta_name.split('.meta')[0].split('/')[-1].split("-")[1]) print(s) if s > model_steps: model_steps = s print(model_steps) if policy == 'cnn': policy_fn = CnnPolicy elif policy == 'lstm': policy_fn = LstmPolicy elif policy == 'lnlstm': policy_fn = LnLstmPolicy tf.reset_default_graph() ob_space = env.observation_space ac_space = env.action_space print("creating model") model = Model(policy=policy_fn, ob_space=ob_space, ac_space=ac_space, nenvs=num_envs, nsteps=5) print("loading model") try: model.load(model_folder, model_steps) except Exception as e: print(e) env.close() return if record_path is not None: name = "{}/{}_{}_steps_{}m".format(record_path, experiment_name,policy,model_steps/1000000) else: name = None print("evaluate") scores = eval(model, env, runs=runs, render=render, record_name=name, level_selector=level_selector) mean_score = np.mean(scores) std_score = np.std(scores) print("Testing on=" + test_name) print("Trained on=" + experiment_name) print("Model id=" + experiment_id) print("Steps trained=" + str(model_steps)) print("Runs=" + str(runs)) print("Mean score=" + str(mean_score)) print("Std. dev.=" + str(std_score)) print("All scores=" + str(scores)) if save_results: print("saving results to " + score_file) # Save results with open(score_file, "a") as myfile: line = "Testing on=" + test_name + "\n" line += "Trained on=" + experiment_name + "\n" line += "Id=" + experiment_id + "\n" line += "Steps trained=" + str(model_steps) + "\n" line += "Runs=" + str(runs) + "\n" line += "Mean score=" + str(mean_score) + "\n" line += "Std. dev.=" + str(std_score) + "\n" line += "All scores=" + str(scores) + "\n" line += "\n" myfile.write(line) env.close() return [wins[0], mean_score, steps]
def save(save_path): ps = sess.run(params) dirname = osp.dirname(save_path) if dirname: make_path(dirname) joblib.dump(ps, save_path)
def learn(policy, env, experiment_name, experiment_id, seed=None, nsteps=5, total_timesteps=int(80e6), vf_coef=0.5, ent_coef=0.01, max_grad_norm=0.5, lr=7e-4, lrschedule='linear', epsilon=1e-5, alpha=0.99, gamma=0.99, save_interval=25000, frame_skip=False, level_selector=None, render=False, diff=0): if seed is None: seed = random.randint(0, 10000000) tf.reset_default_graph() set_global_seeds(seed) # Check if experiment_id exists - then continue if sys.platform.startswith('win'): sep = '\\' else: sep = '/' model_path = '.' + sep + 'results' + sep + experiment_name + sep + 'models' + sep steps = 0 resume = False for model_folder in glob.iglob(model_path + '*' + sep): # Experiment id id = model_folder.split(sep)[-2] if id != experiment_id: continue # Find number of steps for last model for model_meta_name in glob.iglob(model_folder + sep + '*.meta'): resume = True s = int( model_meta_name.split('.meta')[0].split(sep)[-1].split("-")[1]) if s >= steps: steps = s if resume: print("Resuming from step {}".format(steps)) # Log file path log_path = "./results/" + experiment_name + "/logs/" make_path(log_path) log_file = log_path + experiment_id + ".log" f = open("demofile.txt", "w") # Create log file if not resume: with open(log_file, "a") as myfile: line = "episodes; steps; frames; mean_score; std_score; min_score; max_score; difficulty; policy_loss; value_loss; episode_length;fps;\n" myfile.write(line) # Model folder path model_path = "./results/" + experiment_name + "/models/" + experiment_id + "/" make_path(model_path) # Create model nenvs = env.num_envs ob_space = env.observation_space ac_space = env.action_space #num_procs = len(env.remotes) # HACK model = Model(policy=policy, ob_space=ob_space, ac_space=ac_space, nenvs=nenvs, nsteps=nsteps, ent_coef=ent_coef, vf_coef=vf_coef, max_grad_norm=max_grad_norm, lr=lr, alpha=alpha, epsilon=epsilon, total_timesteps=total_timesteps, lrschedule=lrschedule) if resume: try: model.load(model_path, steps) except Exception as e: print(e) env.close() return # Create parallel runner runner = Runner(env, model, nsteps=nsteps, gamma=gamma, render=render, record_name=None) # Training loop nbatch = nenvs * nsteps tstart = time.time() next_model_save = steps + save_interval start_update = steps // nbatch episodes = 0 if not resume: model.save(model_path, 0) else: with open(log_file, "r") as myfile: last_line = None for line in myfile: if len(line.strip()) > 0 and '\x00' not in line: last_line = line pass episodes = int(last_line.split(';')[0]) if level_selector is not None and last_line.split( ';')[-1] != '' and hasattr(level_selector, 'difficulty'): print("Restoring difficulty to " + last_line.split(';')[-1]) level_selector.difficulty = int(last_line.split(';')[-1]) last_frames = 0 for update in range(start_update, total_timesteps // nbatch + 1): obs, states, rewards, masks, actions, values = runner.run() policy_loss, value_loss, policy_entropy = model.train( obs, states, rewards, masks, actions, values) nseconds = time.time() - tstart f = 4 if frame_skip else 1 fps = int((update * nbatch * f) / nseconds) steps = update * nbatch frames = steps * f # Frames is the same as steps if no frame skipping - both are logged # If n final resuts were reported - save the average and std dev if len(runner.final_rewards) >= runner.nenv: episodes += runner.nenv # Extract and remove a number of final rewards equal to the number of workers final_rewards = runner.final_rewards[:runner.nenv] mean_score = np.mean(final_rewards) std_score = np.std(final_rewards) min_score = np.min(final_rewards) max_score = np.max(final_rewards) runner.final_rewards = runner.final_rewards[runner.nenv:] # Debug logging f = frames - last_frames frames_per_episode = f / runner.nenv last_frames = frames last_level_id = runner.last_level_id # Log using baselines logger logger.record_tabular("mean_score", mean_score) logger.record_tabular("std_score", std_score) logger.record_tabular("min_score", min_score) logger.record_tabular("max_score", max_score) logger.record_tabular("steps", steps) logger.record_tabular("frames", frames) logger.record_tabular("episodes", episodes) logger.record_tabular("fps", fps) if level_selector is not None: logger.record_tabular("difficulty", str(level_selector.get_info())) else: logger.record_tabular("difficulty", diff) logger.dump_tabular() # here is where the print statement is done. # Log to file with open(log_file, "a") as myfile: dif = "" if level_selector is not None: dif = str(level_selector.get_info()) else: dif = str(diff) line = str(episodes) + ";" + str(steps) + ";" + str( frames ) + ";" + str(mean_score) + ";" + str(std_score) + ";" + str( min_score) + ";" + str(max_score) + ";" + dif + ";" + str( policy_loss) + ";" + str(value_loss) + ";" + str( frames_per_episode) + ";" + str(fps) + ";" + "\n" myfile.write(line) # Save model if steps >= next_model_save: model.save(model_path, next_model_save) next_model_save += save_interval # Save model in the end model.save(model_path, total_timesteps) env.close()
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--policy', help='Policy architecture', choices=['cnn', 'lstm', 'lnlstm'], default='cnn') parser.add_argument('--lrschedule', help='Learning rate schedule', choices=['constant', 'linear'], default='constant') parser.add_argument( '--num-envs', help='Number of environments/workers to run in parallel (default=12)', type=int, default=12) parser.add_argument('--num-timesteps', help='Number of timesteps to train the model', type=int, default=int(20e6)) parser.add_argument('--game', help='Game name (default=zelda)', default='zelda') parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--save-interval', help='Model saving interval in steps', type=int, default=int(1e6)) parser.add_argument('--level', help='Level (integer) to train on', type=int, default=0) parser.add_argument('--resume', help='The experiment id to resume', default=None) parser.add_argument( '--repetitions', help='Number of repetitions to run sequentially (default=1)', type=int, default=1) parser.add_argument( '--selector', help= 'Level selector to use in training - will ignore the level argument if set (default: None)', choices=[None] + LevelSelector.available, default=None) parser.add_argument('--render', action='store_true', default=False, help='Render screen (default: False)') parser.add_argument('--version', help='game version', type=int, default=0) args = parser.parse_args() # Gym environment name env_id = "gvgai-" + args.game + "-lvl" + str(args.level) + "-v" + str( args.version) difficulty = -1 # Experiment name make_path("./results") experiment_name = args.game if args.selector is not None: experiment_name += "-ls-" + args.selector + "-v" + str(args.version) else: difficulty = math.ceil((args.level + args.version * 5) / 2) experiment_name += "-lvl-" + str(args.level) + "-v" + str(args.version) make_path("./results/" + experiment_name) for i in range(args.repetitions): print("Starting experiment " + str(i + 1) + " of " + str(args.repetitions)) # Unique id for experiment if args.resume is None: experiment_id = str(uuid.uuid1()) else: experiment_id = args.resume # Level selector level_path = './results/' + experiment_name + '/levels/' + experiment_id + '/' level_selector = LevelSelector.get_selector(args.selector, args.game, level_path) # Make gym environment env = make_gvgai_env(env_id=env_id, num_env=args.num_envs, seed=args.seed, level_selector=level_selector) # Atari #env_id = "BreakoutNoFrameskip-v4" #env = make_atari_env(env_id, args.num_envs, args.seed) # Select model if args.policy == 'cnn': policy_fn = CnnPolicy elif args.policy == 'lstm': policy_fn = LstmPolicy elif args.policy == 'lnlstm': policy_fn = LnLstmPolicy learn(policy=policy_fn, env=env, experiment_name=experiment_name, experiment_id=experiment_id, seed=args.seed, total_timesteps=args.num_timesteps, lrschedule=args.lrschedule, frame_skip=False, save_interval=args.save_interval, level_selector=level_selector, render=args.render, diff=difficulty) env.close() print("Experiment DONE")
def save(save_path): session_params = sess.run(params) make_path(os.path.dirname(save_path)) joblib.dump(session_params, save_path)
def save(save_path): ps = sess.run(params) folder = "models/" make_path(folder) joblib.dump(ps, folder + save_path)
def save(save_path, model_name): ps = sess.run(params) make_path(save_path) joblib.dump(ps, '{}/{}.model'.format(save_path, model_name))
def save(path, steps): make_path(path) self.saver.save(sess, path + 'model', global_step=steps)
def save(save_path): parameters = sess.run(params) make_path(os.path.dirname(save_path)) joblib.dump(parameters, save_path)