def run_mslr(model_name, mslr_config=MslrConfig(), training_config=TrainingConfig()): exp_name = utils.get_exp_name(model_name, mslr_config.task_name) writer = SummaryWriter(os.path.join(training_config.log_dir, exp_name)) mslr = MSLR(mslr_config.train_file, mslr_config.dev_file, mslr_config.test_file, mslr_config.batch_size) train_loader, train_df, dev_loader, dev_df, test_loader, test_df = mslr.load_data( ) model, model_inference = get_train_inference_model( model_name, train_loader.num_features) device = ml_utils.get_device() model.to(device) model_inference.to(device) model.apply(ml_utils.init_weights) optimizer = torch.optim.Adam(model.parameters(), lr=training_config.lr) scheduler = torch.optim.lr_scheduler.StepLR( optimizer, step_size=training_config.step_size, gamma=training_config.gamma) loss_op = torch.nn.BCELoss().to(device) losses = [] print("Start training") for epoch in range(training_config.num_epochs): scheduler.step() model.zero_grad() model.train() train_ce_loss = pairwise_train_fn(model, loss_op, optimizer, train_loader, device) print('Finish training for epoch {}'.format(epoch)) train_results = {"loss": train_ce_loss} writer.add_scalars("train", train_results, epoch) print(train_results) losses.append(train_ce_loss) # save to checkpoint every 5 step, and run eval if epoch % training_config.eval_and_save_every == 0: dev_ce_loss, dev_ndcg_results = eval_model_fn( model_inference, device, dev_df, dev_loader, training_config.ndcg_k_list) eval_results = {"loss": dev_ce_loss} print("Validation at epoch {}".format(epoch)) for k in dev_ndcg_results: ndcg_at_str = "NDCG@{}".format(k) eval_results[ndcg_at_str] = dev_ndcg_results[k] print(eval_results) writer.add_scalars("eval", eval_results, epoch) print("Training finished, start testing...") test_ce_loss, test_ndcg_results = eval_model_fn( model_inference, device, test_df, test_loader, training_config.ndcg_k_list) print("Testing loss: {}".format(test_ce_loss)) for k in test_ndcg_results: print("NDCG@{}: {:.5f}".format(k, test_ndcg_results[k]))
def main(game, representation, experiment, steps, n_cpu, render, logging, **kwargs): env_name = '{}-{}-v0'.format(game, representation) exp_name = get_exp_name(game, representation, experiment, **kwargs) resume = kwargs.get('resume', False) if representation == 'wide': policy = FullyConvPolicyBigMap if game == "sokoban": policy = FullyConvPolicySmallMap else: policy = CustomPolicyBigMap if game == "sokoban": policy = CustomPolicySmallMap if game == "binary": kwargs['cropped_size'] = 28 elif game == "zelda": kwargs['cropped_size'] = 22 elif game == "sokoban": kwargs['cropped_size'] = 10 n = max_exp_idx(exp_name) global log_dir if not resume: n = n + 1 log_dir = 'runs/{}_{}_{}'.format(exp_name, n, 'log') if not resume: os.mkdir(log_dir) else: model = load_model(log_dir) kwargs = { **kwargs, 'render_rank': 0, 'render': render, } used_dir = log_dir if not logging: used_dir = None env = make_vec_envs(env_name, representation, log_dir, n_cpu, **kwargs) if not resume or model is None: model = PPO2(policy, env, verbose=1, tensorboard_log="./runs") else: model.set_env(env) if not logging: model.learn(total_timesteps=int(steps), tb_log_name=exp_name) else: model.learn(total_timesteps=int(steps), tb_log_name=exp_name, callback=callback)
def save_sorted_loss_plot(sorted_losses, path): n = 1.0 * len(sorted_losses) i_elbow = get_elbow_index(sorted_losses) elbow_tile = (i_elbow / n * 100) elbow_tile_m = ((i_elbow - 1) / n * 100) elbow_tile_p = ((i_elbow + 1) / n * 100) plt.rcParams["figure.figsize"] = 15, 15 x_scale = list(100 * np.arange(1.0, n + 1.0) / n) fig = plt.figure() fig.suptitle('Val Losses: %s' % utils.get_exp_name(), fontsize=14, fontweight='bold') ax = fig.add_subplot(111) ax.plot(x_scale, sorted_losses) ax.plot([elbow_tile], [sorted_losses[i_elbow]], 'o') ax.plot([elbow_tile_m], [sorted_losses[i_elbow - 1]], 'o') ax.plot([elbow_tile_p], [sorted_losses[i_elbow + 1]], 'o') font_size = 14 ax.set_xlabel('%-tile') ax.set_ylabel('Loss') ax.text(5, .950, '# Examples: %d' % int(n), fontsize=font_size) ax.text(5, .925, 'Elbow Index : %d' % i_elbow, fontsize=font_size) ax.text(5, .900, 'Elbow %%tile: %.4f' % elbow_tile, fontsize=font_size) ax.text(5, .850, 'Min Loss: %.4f' % sorted_losses[0], fontsize=font_size) ax.text(5, .825, 'Max Loss: %.4f' % sorted_losses[-1], fontsize=font_size) ax.text(5, .800, 'Loss at elbow index: %.4f' % sorted_losses[i_elbow], fontsize=font_size) for i, p in enumerate(np.arange(0.8, 1, 0.02)): text = 'LP %5.2f %.4f' % (100 * p, get_tp_value(sorted_losses, p)) ax.text(5, 0.750 - i * 0.02, text, fontsize=font_size) ax.axis([0, 105, 0, 1]) plt.savefig(path) plt.close()
def main(game, representation, experiment, steps, n_cpu, render, logging, **kwargs): env_name = '{}-{}-v0'.format(game, representation) exp_name = get_exp_name(game, representation, experiment, **kwargs) resume = kwargs.get('resume', False) if representation == 'wide': policy = FullyConvPolicyBigMap if game == "sokoban": policy = FullyConvPolicySmallMap else: policy = CustomPolicyBigMap if game == "sokoban": policy = CustomPolicySmallMap if game == "binary": kwargs['cropped_size'] = 28 elif game == "zelda": kwargs['cropped_size'] = 22 elif game == "sokoban": kwargs['cropped_size'] = 10 n = max_exp_idx(exp_name) global log_dir if not resume: n = n + 1 log_dir = 'runs/{}_{}_{}'.format(exp_name, n, 'log') if not resume: os.mkdir(log_dir) else: model = load_model(log_dir) kwargs = { **kwargs, 'render_rank': 0, 'render': render, } used_dir = log_dir if not logging: used_dir = None env = make_env(env_name, representation, 0, None, **kwargs)() print(env.action_space)
def compile_results(settings_list): batch_exp_name = settings_list[0]["experiment_id"] # if batch_exp_name == "2": RL_DIR = "rl_runs" # elif batch_exp_name == "1": # EVO_DIR = "evo_runs_06-13" # RL_DIR = "evo_runs_06-14" # ignored_keys = set( # ( # "exp_name", # "evaluate", # "show_vis", # "visualize", # "render_levels", # "multi_thread", # "play_level", # "evaluate", # "save_levels", # "cascade_reward", # "model", # "n_generations", # "render", # "infer", # ) # ) # keys = [] # for k in settings_list[0].keys(): # if k not in ignored_keys: # keys.append(k) keys = [ "problem", "representation", "conditionals", "alp_gmm", "change_percentage" ] columns = None data = [] vals = [] for i, settings in enumerate(settings_list): val_lst = [] controllable = False for k in keys: v = settings[k] if k == 'conditionals': if k != ['NONE']: controllable = True if isinstance(settings[k], list): if len(settings[k]) < 2: val_lst.append("-".join(settings[k])) else: val_lst.append(newline(settings[k][0]+'-', v[1])) elif k == 'alp_gmm': if not controllable: v = '' elif v: v = 'learning' else: v = 'random' val_lst.append(v) else: val_lst.append(v) args = parse_args(load_args=settings) arg_dict = vars(args) # FIXME: well this is stupid arg_dict["cond_metrics"] = arg_dict.pop("conditionals") exp_name = get_exp_name( arg_dict.pop("problem"), arg_dict.pop("representation"), **arg_dict ) + "_{}_log".format(batch_exp_name) # NOTE: For now, we run this locally in a special directory, to which we have copied the results of eval on # relevant experiments. exp_name = os.path.join(RL_DIR, exp_name) stats_f = os.path.join(exp_name, "eval", "scores_ctrlTrgs.json") fixTrgs_stats_f = os.path.join(exp_name, "eval", "scores_fixTrgs.json") if not (os.path.isfile(stats_f) and os.path.isfile(fixTrgs_stats_f)): print(stats_f) print( "skipping evaluation of experiment due to missing stats file(s): {}".format( exp_name ) ) continue vals.append(tuple(val_lst)) data.append([]) stats = json.load(open(stats_f, "r")) fixLvl_stats = json.load(open(fixTrgs_stats_f, "r")) flat_stats = flatten_stats(fixLvl_stats) flat_stats.update(flatten_stats(stats, controllable=True)) if columns is None: columns = list(flat_stats.keys()) for j, c in enumerate(columns): if c not in flat_stats: data[-1].append("N/A") else: data[-1].append(flat_stats[c]) tuples = vals # Rename headers new_keys = [] for k in keys: if k in header_text: new_keys.append(header_text[k]) else: new_keys.append(k) for (i, lst) in enumerate(tuples): new_lst = [] for v in lst: if v in header_text: new_lst.append(header_text[v]) else: new_lst.append(v) tuples[i] = new_lst index = pd.MultiIndex.from_tuples(tuples, names=new_keys) # df = index.sort_values().to_frame(index=True) df = pd.DataFrame(data=data, index=index, columns=columns).sort_values(by=new_keys) # print(index) csv_name = r"{}/cross_eval_{}.csv".format(RL_DIR, batch_exp_name) html_name = r"{}/cross_eval_{}.html".format(RL_DIR, batch_exp_name) df.to_csv(csv_name) df.to_html(html_name) print(df) # tex_name = r"{}/zelda_empty-path_cell_{}.tex".format(OVERLEAF_DIR, batch_exp_name) # FIXME: F*****G ROUND YOURSELF DUMB FRIEND # df = df.round(2) for p in ["binary", "zelda", "sokoban"]: tex_name = "{}/{}_{}.tex".format(RL_DIR, p, batch_exp_name) df_tex = df.loc[p, "narrow"] p_name = p + '_ctrl' lcl_conds = ['None'] + ['-'.join(pi) if len(pi) < 2 else newline(pi[0]+'-',pi[1]) for pi in local_controls[p_name]] print(lcl_conds) df_tex = df_tex.loc[lcl_conds] # df_tex = df_tex.sort_values(by=['ALP GMM']) z_cols = [ header_text["net_score (mean)"], header_text["diversity_score (mean)"], header_text["(controls) net_score (mean)"], # header_text["(controls) ctrl_score (mean)"], # header_text["(controls) fixed_score (mean)"], header_text["(controls) diversity_score (mean)"], ] # df_tex = df.drop(columns=z_cols) df_tex = df_tex.loc[:, z_cols] df_tex = df_tex * 100 df_tex = df_tex.round(0) dual_conds = ['None', lcl_conds[1]] for k in z_cols: if k in df_tex: # df_tex.loc[dual_conds][k] = df_tex.loc[dual_conds][k].apply( # lambda data: bold_extreme_values(data, data_max=df_tex.loc[dual_conds][k].max()) # ) df_tex[k] = df_tex[k].apply( lambda data: bold_extreme_values(data, data_max=df_tex[k].max()) ) # df_tex = df_tex.round(2) # df_tex.reset_index(level=0, inplace=True) print(df_tex) with open(tex_name, "w") as tex_f: col_widths = "p{0.5cm}p{0.5cm}p{0.5cm}p{0.5cm}p{0.5cm}p{0.5cm}p{0.8cm}p{0.8cm}p{0.8cm}" df_tex.to_latex( tex_f, index=True, columns=z_cols, multirow=True, # column_format=col_widths, escape=False, caption=("Performance of controllable {}-generating agents with learning-progress-informed and uniform-random control regimes and baseline (single-objective) agents with various change percentage allowances.".format(p)), label={"tbl:{}".format(p)}, )
def infer(game, representation, infer_kwargs, **kwargs): """ - max_trials: The number of trials per evaluation. - infer_kwargs: Args to pass to the environment. """ infer_kwargs = {**infer_kwargs, "inference": True, "render": True} max_trials = kwargs.get("max_trials", -1) # n = kwargs.get("n", None) exp_id = infer_kwargs.get('experiment_id') map_width = infer_kwargs.get("map_width") env_name = get_env_name(game, representation) exp_name = get_exp_name(game, representation, **infer_kwargs) # if n is None: # if EXPERIMENT_ID is None: # n = max_exp_idx(exp_name) # else: # n = EXPERIMENT_ID # if n == 0: # raise Exception( # "Did not find ranked saved model of experiment: {}".format(exp_name) # ) crop_size = infer_kwargs.get("cropped_size") if crop_size == -1: infer_kwargs["cropped_size"] = get_crop_size(game) # log_dir = "{}/{}_{}_log".format(EXPERIMENT_DIR, exp_name, n) log_dir = "{}/{}_{}_log".format(EXPERIMENT_DIR, exp_name, exp_id) # no log dir, 1 parallel environment n_cpu = infer_kwargs.get("n_cpu") env, dummy_action_space, n_tools = make_vec_envs( env_name, representation, None, **infer_kwargs ) print("loading model at {}".format(log_dir)) model = load_model( log_dir, load_best=infer_kwargs.get("load_best"), n_tools=n_tools ) if model is None: raise Exception("No model loaded") # model.set_env(env) env.action_space = dummy_action_space obs = env.reset() # Record final values of each trial # if 'binary' in env_name: # path_lengths = [] # changes = [] # regions = [] # infer_info = { # 'path_lengths': [], # 'changes': [], # 'regions': [], # } n_trials = 0 n_step = 0 while n_trials != max_trials: # action = get_action(obs, env, model) action, _ = model.predict(obs) obs, rewards, dones, info = env.step(action) # print('reward: {}'.format(rewards)) # reward = rewards[0] # n_regions = info[0]['regions'] # readouts = [] # if 'binary' in env_name: # curr_path_length = info[0]['path-length'] # readouts.append('path length: {}'.format(curr_path_length) ) # path_lengths.append(curr_path_length) # changes.append(info[0]['changes']) # regions.append(info[0]['regions']) # readouts += ['regions: {}'.format(n_regions), 'reward: {}'.format(reward)] # stringexec = "" # m=0 # y0, dy = 50, 40 # img = np.zeros((256,512,3), np.uint8) # scale_percent = 60 # percent of original size # width = int(img.shape[1] * scale_percent / 100) # height = int(img.shape[0] * scale_percent / 100) # dim = (width, height) # # resize image # for i, line in enumerate(readouts): # y = y0 + i*dy # cv2.putText(img, line, (50, y), font, fontScale, fontColor, lineType) # #stringexec ="cv2.putText(img, TextList[" + str(TextList.index(i))+"], (100, 100+"+str(m)+"), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 100, 100), 1, cv2.LINE_AA)\n" # #m += 100 # #cv2.putText( # # img,readout, # # topLeftCornerOfText, # # font, # # fontScale, # # fontColor, # # lineType) # #Display the image # resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA) # cv2.imshow("img",resized) # cv2.waitKey(1) # #for p, v in model.get_parameters().items(): # # print(p, v.shape) n_step += 1 if dones: env.reset() # #show_state(env, path_lengths, changes, regions, n_step) # if 'binary' in env_name: # infer_info['path_lengths'] = path_lengths[-1] # infer_info['changes'] = changes[-1] # infer_info['regions'] = regions[-1] n_step = 0 n_trials += 1
def main(game, representation, n_frames, n_cpu, render, logging, **kwargs): if game not in [ "binary_ctrl", "sokoban_ctrl", "zelda_ctrl", "smb_ctrl", "MicropolisEnv", "RCT" ]: raise Exception( "Not a controllable environment. Maybe add '_ctrl' to the end of the name? E.g. 'sokoban_ctrl'" ) kwargs['n_cpu'] = n_cpu env_name = get_env_name(game, representation) print('env name: ', env_name) exp_name = get_exp_name(game, representation, **kwargs) resume = kwargs.get('resume', False) ca_action = kwargs.get('ca_action') if representation == 'wide' and not ('RCT' in game or 'Micropolis' in game): if ca_action: raise Exception() # policy = CApolicy else: policy = FullyConvPolicyBigMap # policy = WidePolicy if game == "sokoban" or game == "sokoban_ctrl": # T() policy = FullyConvPolicySmallMap else: # policy = ActorCriticCnnPolicy policy = CustomPolicyBigMap if game == "sokoban" or game == "sokoban_ctrl": # T() policy = CustomPolicySmallMap crop_size = kwargs.get('cropped_size') if crop_size == -1: kwargs['cropped_size'] = get_crop_size(game) exp_id = kwargs.get('experiment_id') # n = kwargs.get('experiment_id') # if n is None: # n = max_exp_idx(exp_name) # if not resume: # n += 1 global log_dir exp_name_id = '{}_{}'.format(exp_name, exp_id) # log_dir = 'rl_runs/{}_{}_log'.format(exp_name, n) log_dir = 'rl_runs/{}_log'.format(exp_name_id) kwargs = { **kwargs, 'render_rank': 0, 'render': render, } # if not resume: try: os.mkdir(log_dir) print("Log directory does not exist, starting anew, bb.") resume = False except Exception: print("Log directory exists, fumbling on. Will try to load model.") try: env, dummy_action_space, n_tools = make_vec_envs( env_name, representation, log_dir, **kwargs) except Exception as e: # if this is a new experiment, clean up the logging directory if we fail to start up # if not resume: # os.rmdir(log_dir) raise e with open(os.path.join(log_dir, 'settings.json'), 'w', encoding='utf-8') as f: json.dump(kwargs, f, ensure_ascii=False, indent=4) # pass if resume: model = load_model(log_dir, n_tools=n_tools) if representation == 'wide': # policy_kwargs = {'n_tools': n_tools} policy_kwargs = {} if ca_action: # FIXME: there should be a better way hahahaha env.action_space = dummy_action_space # more frequent updates, for debugging... or because our action space is huge? # n_steps = 512 else: pass # n_steps = 2048 else: policy_kwargs = {} # the default for SB3 PPO # n_steps = 2048 if not resume or model is None: # model = PPO(policy, env, verbose=1, n_steps=n_steps, # tensorboard_log="./runs", policy_kwargs=policy_kwargs) model = PPO2(policy, env, verbose=1, tensorboard_log="./rl_runs", policy_kwargs=policy_kwargs) # else: model.set_env(env) #model.policy = model.policy.to('cuda:0') # if torch.cuda.is_available(): # model.policy = model.policy.cuda() tb_log_name = '{}_tb'.format(exp_name_id) if not logging: model.learn(total_timesteps=n_frames, tb_log_name=tb_log_name) else: model.learn(total_timesteps=n_frames, tb_log_name=tb_log_name, callback=callback)
def infer(game, representation, experiment, infer_kwargs, **kwargs): """ - max_trials: The number of trials per evaluation. - infer_kwargs: Args to pass to the environment. """ infer_kwargs = { **infer_kwargs, 'inference': True, 'render': True, } max_trials = kwargs.get('max_trials', -1) n = kwargs.get('n', None) env_name = '{}-{}-v0'.format(game, representation) exp_name = get_exp_name(game, representation, experiment, **kwargs) if n is None: n = max_exp_idx(exp_name) if n == 0: raise Exception('Did not find ranked saved model of experiment: {}'.format(exp_name)) log_dir = 'runs/{}_{}_{}'.format(exp_name, n, 'log') model = load_model(log_dir) # no log dir, 1 parallel environment n_cpu = infer_kwargs.get('n_cpu', 12) env = make_vec_envs(env_name, representation, None, n_cpu, **infer_kwargs) obs = env.reset() # Record final values of each trial if 'binary' in env_name: path_lengths = [] changes = [] regions = [] infer_info = { 'path_lengths': [], 'changes': [], 'regions': [], } n_trials = 0 while n_trials != max_trials: #action = get_action(obs, env, model) action, _ = model.predict(obs) obs, rewards, dones, info = env.step(action) reward = rewards[0] n_regions = info[0]['regions'] readouts = [] if 'binary' in env_name: curr_path_length = info[0]['path-length'] readouts.append('path length: {}'.format(curr_path_length) ) path_lengths.append(curr_path_length) changes.append(info[0]['changes']) regions.append(info[0]['regions']) readouts += ['regions: {}'.format(n_regions), 'reward: {}'.format(reward)] stringexec = "" m=0 y0, dy = 50, 40 img = np.zeros((256,512,3), np.uint8) scale_percent = 60 # percent of original size width = int(img.shape[1] * scale_percent / 100) height = int(img.shape[0] * scale_percent / 100) dim = (width, height) # resize image for i, line in enumerate(readouts): y = y0 + i*dy cv2.putText(img, line, (50, y), font, fontScale, fontColor, lineType) #stringexec ="cv2.putText(img, TextList[" + str(TextList.index(i))+"], (100, 100+"+str(m)+"), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 100, 100), 1, cv2.LINE_AA)\n" #m += 100 #cv2.putText( # img,readout, # topLeftCornerOfText, # font, # fontScale, # fontColor, # lineType) #Display the image resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA) cv2.imshow("img",resized) cv2.waitKey(1) #for p, v in model.get_parameters().items(): # print(p, v.shape) if dones: #show_state(env, path_lengths, changes, regions, n_step) if 'binary' in env_name: infer_info['path_lengths'] = path_lengths[-1] infer_info['changes'] = changes[-1] infer_info['regions'] = regions[-1] n_trials += 1 return infer_info
args.num_classes = 4 in_channel = 3 elif args.dataset == "TinyImageNet": load_dataset = TinyImageNet_Dataset() args.num_classes = 20 in_channel = 3 elif args.dataset == "CIFAR100": load_dataset = CIFAR100_Dataset() args.num_classes = 15 in_channel = 3 elif args.dataset == "CIFARAddN": load_dataset = CIFARAddN_Dataset() args.num_classes = 4 in_channel = 3 exp_name = utils.get_exp_name(args) print("Experiment: %s" % exp_name) ### run experiment 1/5 times for run_idx in range(args.exp, args.exp + 1): print("Begin to Run Exp %s..." % run_idx) args.run_idx = run_idx seed_sampler = int(args.seed_sampler.split(' ')[run_idx]) # seed_sampler = None save_path = 'results/%s/%s/%s' % (args.dataset, exp_name, str(run_idx)) args.save_path = save_path if not os.path.exists(save_path): os.makedirs(save_path) latent_dim = 32 if args.encode_z:
def evaluate(game, representation, infer_kwargs, fix_trgs=False, **kwargs): """ - max_trials: The number of trials per evaluation. - infer_kwargs: Args to pass to the environment. """ global N_BINS global N_MAPS global N_TRIALS infer_kwargs = {**infer_kwargs, "inference": True, "evaluate": True} # max_trials = kwargs.get("max_trials", -1) # n = kwargs.get("n", None) exp_id = infer_kwargs.get('experiment_id') # map_width = infer_kwargs.get("map_width") max_steps = infer_kwargs.get("max_step") eval_controls = infer_kwargs.get("eval_controls") env_name = get_env_name(game, representation) # env_name = '{}-{}-v0'.format(game, representation) exp_name = get_exp_name(game, representation, **kwargs) levels_im_name = "{}_{}-bins_levels.png" # if n is None: # if EXPERIMENT_ID is None: # n = max_exp_idx(exp_name) # print( # "Experiment index not specified, setting index automatically to {}".format( # n # ) # ) # else: # n = EXPERIMENT_ID # if n == 0: # raise Exception( # "Did not find ranked saved model of experiment: {}".format(exp_name) # ) crop_size = infer_kwargs.get("cropped_size") if crop_size == -1: infer_kwargs["cropped_size"] = get_crop_size(game) log_dir = os.path.join(EXPERIMENT_DIR, '{}_{}_log'.format(exp_name, exp_id)) eval_dir = os.path.join(log_dir, 'eval') if not os.path.isdir(eval_dir): os.mkdir(eval_dir) # log_dir = "{}/{}_{}_log".format(EXPERIMENT_DIR, exp_name, exp_id) data_path = os.path.join(eval_dir, "{}_eval_data".format(N_BINS)) data_path_levels = os.path.join(eval_dir, "{}_eval_data_levels".format(N_BINS)) if fix_trgs: data_path += "_fixTrgs" data_path_levels += "_fixTrgs" data_path += ".pkl" data_path_levels += ".pkl" if VIS_ONLY: # if RENDER_LEVELS: # eval_data_levels = pickle.load(open(data_path_levels, "rb")) # eval_data_levels.render_levels() # return eval_data = pickle.load(open(data_path, "rb")) # FIXME: just for backward compatibility eval_data.eval_dir = eval_dir eval_data.render_levels() eval_data.visualize_data(eval_dir, fix_trgs) # eval_data.hamming_heatmap(None, eval_data.div_scores) return # no log dir, 1 parallel environment n_cpu = infer_kwargs.get("n_cpu") infer_kwargs['render_path'] = True env, dummy_action_space, n_tools = make_vec_envs(env_name, representation, None, **infer_kwargs) model = load_model(log_dir, load_best=infer_kwargs.get("load_best"), n_tools=n_tools) # model.set_env(env) env.action_space = dummy_action_space env = env.envs[0] # Record final values of each trial # if 'binary' in env_name: # path_lengths = [] # changes = [] # regions = [] # infer_info = { # 'path_lengths': [], # 'changes': [], # 'regions': [], # } if n_cpu == 1: # control_bounds = env.envs[0].get_control_bounds() control_bounds = env.get_control_bounds() elif n_cpu > 1: raise Exception("no homie, no") # supply args and kwargs env.remotes[0].send(("env_method", ("get_control_bounds", [], {}))) control_bounds = env.remotes[0].recv() if not eval_controls: eval_controls = control_bounds.keys() if len(control_bounds) == 0: # Then this is a non-controllable agent. # Can't we just do this in all cases though? control_bounds = env.cond_bounds ctrl_bounds = [(k, control_bounds[k]) for k in eval_controls] # if len(ctrl_bounds) == 0 and DIVERSITY_EVAL: # N_MAPS = 100 # N_TRIALS = 1 # Hackish get initial states init_states = [] for i in range(N_MAPS): env.reset() # TODO: set initial states in either of these domains? if not (RCT or SC): init_states.append(env.unwrapped._rep._map) N_EVALS = N_TRIALS * N_MAPS def eval_static_trgs(): '''Run an evaluation on the default values for all level metrics. For both controllable and vanilla agents. The latter's "home turf."''' N_BINS = None level_images = [] cell_scores = np.zeros(shape=(1, 1, N_EVALS)) div_scores = np.zeros(shape=(1, 1)) cell_static_scores = np.zeros(shape=(1, 1, N_EVALS)) cell_ctrl_scores = np.zeros(shape=(1, 1, N_EVALS)) level_tokens = None # if DIVERSITY_EVAL: # n_row = 1 # n_col = 1 # else: n_row = 2 n_col = 5 for i in range(n_row): level_images_y = [] for j in range(n_col): net_score, ctrl_score, static_score, level_image, tokens = eval_episodes( model, env, N_EVALS, n_cpu, init_states, eval_dir, env.unwrapped._prob.static_trgs, max_steps, ) level_images_y.append(level_image) cell_scores[0, 0, :] = net_score div_score = np.sum( [np.sum(a != b) for a in tokens for b in tokens]) / (len(tokens) * (len(tokens) - 1)) div_score = div_score / (map_width * map_width) div_scores[0, 0] = div_score level_images.append(np.hstack(level_images_y)) image = np.vstack(level_images[::-1]) image = Image.fromarray(image) image.save( os.path.join(eval_dir, levels_im_name.format(ctrl_names, N_BINS))) return cell_scores, cell_static_scores, cell_ctrl_scores, div_scores, level_tokens, image if len(ctrl_bounds) == 0: # If we didn't train with controls, we'll evaluate inside a grid of targets (on the controllable agents' turf) # and record scores for the cell corresponding to the default static targets (on the vanilla turf), # depending on the value of fix_trgs. ctrl_names = prob_cond_metrics[problem] ctrl_bounds = [(k, env.cond_bounds[k]) for k in ctrl_names] if fix_trgs: ctrl_names = None ctrl_ranges = None cell_scores, cell_static_scores, cell_ctrl_scores, div_scores, level_tokens, image = eval_static_trgs( ) elif len(ctrl_bounds) == 1: ctrl_name = ctrl_bounds[0][0] bounds = ctrl_bounds[0][1] step_size = max((bounds[1] - bounds[0]) / (N_BINS[0] - 1), 1) eval_trgs = np.arange(bounds[0], bounds[1] + 1, step_size) level_images = [] cell_scores = np.zeros((len(eval_trgs), 1, N_EVALS)) cell_ctrl_scores = np.zeros(shape=(len(eval_trgs), 1, N_EVALS)) cell_static_scores = np.zeros(shape=(len(eval_trgs), 1, N_EVALS)) level_tokens = [] div_scores = np.zeros((len(eval_trgs), 1)) for i, trg in enumerate(eval_trgs): trg_dict = {ctrl_name: trg} print("evaluating control targets: {}".format(trg_dict)) # set_ctrl_trgs(env, {ctrl_name: trg}) net_score, ctrl_score, static_score, level_image, tokens = eval_episodes( model, env, N_EVALS, n_cpu, init_states, eval_dir, trg_dict, max_steps) div_score = div_calc(tokens) div_scores[i, 0] = div_score if i % LVL_RENDER_INTERVAL == 0: level_images.append(level_image) cell_scores[i, :, :] = net_score cell_ctrl_scores[i, :, :] = ctrl_score cell_static_scores[i, :, :] = static_score level_tokens.append(tokens) ctrl_names = (ctrl_name, None) ctrl_ranges = (eval_trgs, None) # if "regions" in ctrl_ranges: # # hack it to ensure our default static trgs are in the heatmap, so we can compare on baseline's turf # ctrl_ranges["regions"][0] = 1 ims = np.hstack(level_images) image = Image.fromarray(ims) image.save( os.path.join(eval_dir, levels_im_name.format(ctrl_names, N_BINS))) elif len(ctrl_bounds) >= 2: ctrl_0, ctrl_1 = ctrl_bounds[0][0], ctrl_bounds[1][0] b0, b1 = ctrl_bounds[0][1], ctrl_bounds[1][1] step_0 = max((b0[1] - b0[0]) / (N_BINS[0] - 1), 1) step_1 = max((b1[1] - b1[0]) / (N_BINS[-1] - 1), 1) trgs_0 = np.arange(b0[0], b0[1] + 0.5, step_0) trgs_1 = np.arange(b1[0], b1[1] + 0.5, step_1) cell_scores = np.zeros(shape=(len(trgs_0), len(trgs_1), N_EVALS)) div_scores = np.zeros(shape=(len(trgs_0), len(trgs_1))) cell_ctrl_scores = np.zeros(shape=(len(trgs_0), len(trgs_1), N_EVALS)) cell_static_scores = np.zeros(shape=(len(trgs_0), len(trgs_1), N_EVALS)) level_tokens = [[None] * len(trgs_0)] * len(trgs_1) # Wait what? trg_dict = env.static_trgs trg_dict = dict([(k, min(v)) if isinstance(v, tuple) else (k, v) for (k, v) in trg_dict.items()]) level_images = [] for i, t0 in enumerate(trgs_0): level_images_y = [] for j, t1 in enumerate(trgs_1): ctrl_trg_dict = {ctrl_0: t0, ctrl_1: t1} trg_dict.update(ctrl_trg_dict) print("evaluating control targets: {}".format(trg_dict)) # set_ctrl_trgs(env, {ctrl_name: trg}) net_score, ctrl_score, static_score, level_image, tokens = eval_episodes( model, env, N_EVALS, n_cpu, init_states, eval_dir, trg_dict, max_steps, ) if j % LVL_RENDER_INTERVAL == 0: level_images_y.append(level_image) cell_scores[i, j, :] = net_score cell_ctrl_scores[i, j, :] = ctrl_score cell_static_scores[i, j, :] = static_score div_score = div_calc(tokens) div_scores[i, j] = div_score # level_tokens[j][i] = tokens if i % LVL_RENDER_INTERVAL == 0: level_images.append(np.hstack(level_images_y)) # level_tokens.append(tokens) ctrl_names = (ctrl_0, ctrl_1) ctrl_ranges = (trgs_0, trgs_1) image = None image = np.vstack(level_images[::-1]) image = Image.fromarray(image) image.save( os.path.join(eval_dir, levels_im_name.format(ctrl_names, N_BINS))) levels_im_path = os.path.join(eval_dir, levels_im_name.format(ctrl_names, N_BINS)) eval_data = EvalData( ctrl_names, ctrl_ranges, cell_scores, cell_ctrl_scores, cell_static_scores, div_scores=div_scores, eval_dir=eval_dir, levels_image=image, levels_im_path=levels_im_path, ) pickle.dump(eval_data, open(data_path, "wb")) eval_data.visualize_data(eval_dir, fix_trgs) # else: # levels_im_path = os.path.join( # eval_dir, levels_im_name.format(ctrl_names, N_BINS) # ) # eval_data_levels = EvalData( # ctrl_names, # ctrl_ranges, # cell_scores, # cell_ctrl_scores, # cell_static_scores, # div_scores=div_scores, # eval_dir=eval_dir, # levels_image=image, # levels_im_path=levels_im_path, # ) # pickle.dump(eval_data_levels, open(data_path_levels, "wb")) if not fix_trgs: eval_data.render_levels() if DIVERSITY_EVAL: # eval_data = eval_data if fix_trgs: eval_data.save_stats(div_scores=div_scores, fix_trgs=fix_trgs) else: pass # eval_data.hamming_heatmap(level_tokens, div_scores=div_scores) env.close()
z_end = -z_start imgs = model.generate_sk_seq_from_z(z_start, z_end, num_steps) save_gif(imgs, 'diagonal_random_%d.gif' % i) x_batch = data_loader.get_test_data_batch(1024, 0, norm=True) x_start = x_batch[np.random.randint(0, x_batch.shape[0])] x_end = x_batch[np.random.randint(0, x_batch.shape[0])] imgs = model.generate_sk_seq_from_x(x_start, x_end, num_steps) save_gif(imgs, 'random_x_walk_%d.gif' % i) if 'eval' in tasks: ## Evaluating Losses loss_x, loss_z, loss_c, loss_g = evaluate_losses(n_iter=20) exp_name = utils.get_exp_name() print '\n Losses:' print exp_name, 'X_recon_loss: %.05f iteration: %d' % (loss_x, model.iter_number) print exp_name, 'Z_recon_loss: %.05f iteration: %d' % (loss_z, model.iter_number) print exp_name, 'C_recon_loss: %.05f iteration: %d' % (loss_c, model.iter_number) print exp_name, 'A_gener_loss: %.05f iteration: %d' % (loss_g, model.iter_number) print '' if 'zdist' in tasks: print('\nPlotting distribution for Z') z_dist_batch_size = 1024