Exemple #1
0
def run_mslr(model_name,
             mslr_config=MslrConfig(),
             training_config=TrainingConfig()):
    exp_name = utils.get_exp_name(model_name, mslr_config.task_name)
    writer = SummaryWriter(os.path.join(training_config.log_dir, exp_name))

    mslr = MSLR(mslr_config.train_file, mslr_config.dev_file,
                mslr_config.test_file, mslr_config.batch_size)
    train_loader, train_df, dev_loader, dev_df, test_loader, test_df = mslr.load_data(
    )
    model, model_inference = get_train_inference_model(
        model_name, train_loader.num_features)
    device = ml_utils.get_device()
    model.to(device)
    model_inference.to(device)
    model.apply(ml_utils.init_weights)
    optimizer = torch.optim.Adam(model.parameters(), lr=training_config.lr)
    scheduler = torch.optim.lr_scheduler.StepLR(
        optimizer,
        step_size=training_config.step_size,
        gamma=training_config.gamma)
    loss_op = torch.nn.BCELoss().to(device)
    losses = []

    print("Start training")
    for epoch in range(training_config.num_epochs):

        scheduler.step()
        model.zero_grad()
        model.train()

        train_ce_loss = pairwise_train_fn(model, loss_op, optimizer,
                                          train_loader, device)
        print('Finish training for epoch {}'.format(epoch))
        train_results = {"loss": train_ce_loss}
        writer.add_scalars("train", train_results, epoch)
        print(train_results)
        losses.append(train_ce_loss)

        # save to checkpoint every 5 step, and run eval
        if epoch % training_config.eval_and_save_every == 0:
            dev_ce_loss, dev_ndcg_results = eval_model_fn(
                model_inference, device, dev_df, dev_loader,
                training_config.ndcg_k_list)
            eval_results = {"loss": dev_ce_loss}
            print("Validation at epoch {}".format(epoch))
            for k in dev_ndcg_results:
                ndcg_at_str = "NDCG@{}".format(k)
                eval_results[ndcg_at_str] = dev_ndcg_results[k]
            print(eval_results)
            writer.add_scalars("eval", eval_results, epoch)

    print("Training finished, start testing...")
    test_ce_loss, test_ndcg_results = eval_model_fn(
        model_inference, device, test_df, test_loader,
        training_config.ndcg_k_list)
    print("Testing loss: {}".format(test_ce_loss))
    for k in test_ndcg_results:
        print("NDCG@{}: {:.5f}".format(k, test_ndcg_results[k]))
Exemple #2
0
def main(game, representation, experiment, steps, n_cpu, render, logging,
         **kwargs):
    env_name = '{}-{}-v0'.format(game, representation)
    exp_name = get_exp_name(game, representation, experiment, **kwargs)
    resume = kwargs.get('resume', False)
    if representation == 'wide':
        policy = FullyConvPolicyBigMap
        if game == "sokoban":
            policy = FullyConvPolicySmallMap
    else:
        policy = CustomPolicyBigMap
        if game == "sokoban":
            policy = CustomPolicySmallMap
    if game == "binary":
        kwargs['cropped_size'] = 28
    elif game == "zelda":
        kwargs['cropped_size'] = 22
    elif game == "sokoban":
        kwargs['cropped_size'] = 10
    n = max_exp_idx(exp_name)
    global log_dir
    if not resume:
        n = n + 1
    log_dir = 'runs/{}_{}_{}'.format(exp_name, n, 'log')
    if not resume:
        os.mkdir(log_dir)
    else:
        model = load_model(log_dir)
    kwargs = {
        **kwargs,
        'render_rank': 0,
        'render': render,
    }
    used_dir = log_dir
    if not logging:
        used_dir = None
    env = make_vec_envs(env_name, representation, log_dir, n_cpu, **kwargs)
    if not resume or model is None:
        model = PPO2(policy, env, verbose=1, tensorboard_log="./runs")
    else:
        model.set_env(env)
    if not logging:
        model.learn(total_timesteps=int(steps), tb_log_name=exp_name)
    else:
        model.learn(total_timesteps=int(steps),
                    tb_log_name=exp_name,
                    callback=callback)
Exemple #3
0
def save_sorted_loss_plot(sorted_losses, path):
    n = 1.0 * len(sorted_losses)
    i_elbow = get_elbow_index(sorted_losses)

    elbow_tile = (i_elbow / n * 100)
    elbow_tile_m = ((i_elbow - 1) / n * 100)
    elbow_tile_p = ((i_elbow + 1) / n * 100)

    plt.rcParams["figure.figsize"] = 15, 15
    x_scale = list(100 * np.arange(1.0, n + 1.0) / n)

    fig = plt.figure()
    fig.suptitle('Val Losses: %s' % utils.get_exp_name(),
                 fontsize=14,
                 fontweight='bold')

    ax = fig.add_subplot(111)
    ax.plot(x_scale, sorted_losses)

    ax.plot([elbow_tile], [sorted_losses[i_elbow]], 'o')
    ax.plot([elbow_tile_m], [sorted_losses[i_elbow - 1]], 'o')
    ax.plot([elbow_tile_p], [sorted_losses[i_elbow + 1]], 'o')

    font_size = 14

    ax.set_xlabel('%-tile')
    ax.set_ylabel('Loss')

    ax.text(5, .950, '# Examples: %d' % int(n), fontsize=font_size)
    ax.text(5, .925, 'Elbow Index : %d' % i_elbow, fontsize=font_size)
    ax.text(5, .900, 'Elbow %%tile: %.4f' % elbow_tile, fontsize=font_size)

    ax.text(5, .850, 'Min Loss: %.4f' % sorted_losses[0], fontsize=font_size)
    ax.text(5, .825, 'Max Loss: %.4f' % sorted_losses[-1], fontsize=font_size)
    ax.text(5,
            .800,
            'Loss at elbow index: %.4f' % sorted_losses[i_elbow],
            fontsize=font_size)

    for i, p in enumerate(np.arange(0.8, 1, 0.02)):
        text = 'LP %5.2f %.4f' % (100 * p, get_tp_value(sorted_losses, p))
        ax.text(5, 0.750 - i * 0.02, text, fontsize=font_size)

    ax.axis([0, 105, 0, 1])

    plt.savefig(path)
    plt.close()
Exemple #4
0
def main(game, representation, experiment, steps, n_cpu, render, logging,
         **kwargs):
    env_name = '{}-{}-v0'.format(game, representation)
    exp_name = get_exp_name(game, representation, experiment, **kwargs)
    resume = kwargs.get('resume', False)
    if representation == 'wide':
        policy = FullyConvPolicyBigMap
        if game == "sokoban":
            policy = FullyConvPolicySmallMap
    else:
        policy = CustomPolicyBigMap
        if game == "sokoban":
            policy = CustomPolicySmallMap
    if game == "binary":
        kwargs['cropped_size'] = 28
    elif game == "zelda":
        kwargs['cropped_size'] = 22
    elif game == "sokoban":
        kwargs['cropped_size'] = 10
    n = max_exp_idx(exp_name)
    global log_dir
    if not resume:
        n = n + 1
    log_dir = 'runs/{}_{}_{}'.format(exp_name, n, 'log')
    if not resume:
        os.mkdir(log_dir)
    else:
        model = load_model(log_dir)
    kwargs = {
        **kwargs,
        'render_rank': 0,
        'render': render,
    }
    used_dir = log_dir
    if not logging:
        used_dir = None

    env = make_env(env_name, representation, 0, None, **kwargs)()
    print(env.action_space)
Exemple #5
0
def compile_results(settings_list):
    batch_exp_name = settings_list[0]["experiment_id"]
    #   if batch_exp_name == "2":
    RL_DIR = "rl_runs"
    #   elif batch_exp_name == "1":
    #       EVO_DIR = "evo_runs_06-13"
    #       RL_DIR = "evo_runs_06-14"
    #   ignored_keys = set(
    #       (
    #           "exp_name",
    #           "evaluate",
    #           "show_vis",
    #           "visualize",
    #           "render_levels",
    #           "multi_thread",
    #           "play_level",
    #           "evaluate",
    #           "save_levels",
    #           "cascade_reward",
    #           "model",
    #           "n_generations",
    #           "render",
    #           "infer",
    #       )
    #   )
    #   keys = []

    #   for k in settings_list[0].keys():
    #       if k not in ignored_keys:
    #           keys.append(k)
    keys = [
            "problem", 
            "representation", 
            "conditionals", 
            "alp_gmm", 
            "change_percentage"
            ]
    columns = None
    data = []
    vals = []

    for i, settings in enumerate(settings_list):
        val_lst = []

        controllable = False
        for k in keys:
            v = settings[k]
            if k == 'conditionals':
                if k != ['NONE']:
                    controllable = True
            if isinstance(settings[k], list):
                if len(settings[k]) < 2:
                    val_lst.append("-".join(settings[k]))
                else:
                    val_lst.append(newline(settings[k][0]+'-', v[1]))
            elif k == 'alp_gmm':
                if not controllable:
                    v = ''
                elif v:
                    v = 'learning'
                else:
                    v = 'random'
                val_lst.append(v)
            else:
                val_lst.append(v)
        args = parse_args(load_args=settings)
        arg_dict = vars(args)
        # FIXME: well this is stupid
        arg_dict["cond_metrics"] = arg_dict.pop("conditionals")
        exp_name = get_exp_name(
            arg_dict.pop("problem"), arg_dict.pop("representation"), **arg_dict
        ) + "_{}_log".format(batch_exp_name)
        # NOTE: For now, we run this locally in a special directory, to which we have copied the results of eval on
        # relevant experiments.
        exp_name = os.path.join(RL_DIR, exp_name)
        stats_f = os.path.join(exp_name, "eval", "scores_ctrlTrgs.json")
        fixTrgs_stats_f = os.path.join(exp_name, "eval", "scores_fixTrgs.json")

        if not (os.path.isfile(stats_f) and os.path.isfile(fixTrgs_stats_f)):
            print(stats_f)
            print(
                "skipping evaluation of experiment due to missing stats file(s): {}".format(
                    exp_name
                )
            )

            continue
        vals.append(tuple(val_lst))
        data.append([])
        stats = json.load(open(stats_f, "r"))
        fixLvl_stats = json.load(open(fixTrgs_stats_f, "r"))
        flat_stats = flatten_stats(fixLvl_stats)
        flat_stats.update(flatten_stats(stats, controllable=True))

        if columns is None:
            columns = list(flat_stats.keys())

        for j, c in enumerate(columns):
            if c not in flat_stats:
                data[-1].append("N/A")
            else:
                data[-1].append(flat_stats[c])

    tuples = vals
    # Rename headers
    new_keys = []

    for k in keys:
        if k in header_text:
            new_keys.append(header_text[k])
        else:
            new_keys.append(k)
    for (i, lst) in enumerate(tuples):
        new_lst = []
        for v in lst:
            if v in header_text:
                new_lst.append(header_text[v])
            else:
                new_lst.append(v)
        tuples[i] = new_lst

    index = pd.MultiIndex.from_tuples(tuples, names=new_keys)
    #   df = index.sort_values().to_frame(index=True)
    df = pd.DataFrame(data=data, index=index, columns=columns).sort_values(by=new_keys)
    #   print(index)

    csv_name = r"{}/cross_eval_{}.csv".format(RL_DIR, batch_exp_name)
    html_name = r"{}/cross_eval_{}.html".format(RL_DIR, batch_exp_name)
    df.to_csv(csv_name)
    df.to_html(html_name)
    print(df)

    #   tex_name = r"{}/zelda_empty-path_cell_{}.tex".format(OVERLEAF_DIR, batch_exp_name)
    # FIXME: F*****G ROUND YOURSELF DUMB FRIEND
#   df = df.round(2)
    for p in ["binary", "zelda", "sokoban"]:
        tex_name = "{}/{}_{}.tex".format(RL_DIR, p, batch_exp_name)
        df_tex = df.loc[p, "narrow"]
        p_name = p + '_ctrl'
        lcl_conds = ['None'] + ['-'.join(pi) if len(pi) < 2 else newline(pi[0]+'-',pi[1]) for pi in local_controls[p_name]]
        print(lcl_conds)
        df_tex = df_tex.loc[lcl_conds]
#       df_tex = df_tex.sort_values(by=['ALP GMM'])
        z_cols = [
            header_text["net_score (mean)"],
            header_text["diversity_score (mean)"],
            header_text["(controls) net_score (mean)"],
#           header_text["(controls) ctrl_score (mean)"],
#           header_text["(controls) fixed_score (mean)"],
            header_text["(controls) diversity_score (mean)"],
        ]
        #   df_tex = df.drop(columns=z_cols)
        df_tex = df_tex.loc[:, z_cols]
        df_tex = df_tex * 100
        df_tex = df_tex.round(0)
        dual_conds = ['None', lcl_conds[1]]
        for k in z_cols:
            if k in df_tex:
#               df_tex.loc[dual_conds][k] = df_tex.loc[dual_conds][k].apply(
#                   lambda data: bold_extreme_values(data, data_max=df_tex.loc[dual_conds][k].max())
#               )
                df_tex[k] = df_tex[k].apply(
                    lambda data: bold_extreme_values(data, data_max=df_tex[k].max())
                )
#       df_tex = df_tex.round(2)
#       df_tex.reset_index(level=0, inplace=True)
        print(df_tex)

        with open(tex_name, "w") as tex_f:
            col_widths = "p{0.5cm}p{0.5cm}p{0.5cm}p{0.5cm}p{0.5cm}p{0.5cm}p{0.8cm}p{0.8cm}p{0.8cm}"
            df_tex.to_latex(
                tex_f,
                index=True,
                columns=z_cols,
                multirow=True,
    #           column_format=col_widths,
                escape=False,
                caption=("Performance of controllable {}-generating agents with learning-progress-informed and uniform-random control regimes and baseline (single-objective) agents with various change percentage allowances.".format(p)),
                label={"tbl:{}".format(p)},
            )
Exemple #6
0
def infer(game, representation, infer_kwargs, **kwargs):
    """
     - max_trials: The number of trials per evaluation.
     - infer_kwargs: Args to pass to the environment.
    """
    infer_kwargs = {**infer_kwargs, "inference": True, "render": True}
    max_trials = kwargs.get("max_trials", -1)
#   n = kwargs.get("n", None)
    exp_id = infer_kwargs.get('experiment_id')
    map_width = infer_kwargs.get("map_width")
    env_name = get_env_name(game, representation)
    exp_name = get_exp_name(game, representation, **infer_kwargs)

#   if n is None:
#       if EXPERIMENT_ID is None:
#           n = max_exp_idx(exp_name)
#       else:
#           n = EXPERIMENT_ID

#   if n == 0:
#       raise Exception(
#           "Did not find ranked saved model of experiment: {}".format(exp_name)
#       )
    crop_size = infer_kwargs.get("cropped_size")

    if crop_size == -1:
        infer_kwargs["cropped_size"] = get_crop_size(game)
#   log_dir = "{}/{}_{}_log".format(EXPERIMENT_DIR, exp_name, n)
    log_dir = "{}/{}_{}_log".format(EXPERIMENT_DIR, exp_name, exp_id)
    # no log dir, 1 parallel environment
    n_cpu = infer_kwargs.get("n_cpu")
    env, dummy_action_space, n_tools = make_vec_envs(
        env_name, representation, None, **infer_kwargs
    )
    print("loading model at {}".format(log_dir))
    model = load_model(
        log_dir, load_best=infer_kwargs.get("load_best"), n_tools=n_tools
    )
    if model is None:
        raise Exception("No model loaded")
    #   model.set_env(env)
    env.action_space = dummy_action_space
    obs = env.reset()
    # Record final values of each trial
    #   if 'binary' in env_name:
    #       path_lengths = []
    #       changes = []
    #       regions = []
    #       infer_info = {
    #           'path_lengths': [],
    #           'changes': [],
    #           'regions': [],
    #           }
    n_trials = 0
    n_step = 0

    while n_trials != max_trials:
        # action = get_action(obs, env, model)
        action, _ = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        #       print('reward: {}'.format(rewards))
        #       reward = rewards[0]
        #       n_regions = info[0]['regions']
        #       readouts = []
        #       if 'binary' in env_name:
        #           curr_path_length = info[0]['path-length']
        #           readouts.append('path length: {}'.format(curr_path_length) )
        #           path_lengths.append(curr_path_length)
        #           changes.append(info[0]['changes'])
        #           regions.append(info[0]['regions'])

        #       readouts += ['regions: {}'.format(n_regions), 'reward: {}'.format(reward)]
        #       stringexec = ""
        #       m=0
        #       y0, dy = 50, 40
        #       img = np.zeros((256,512,3), np.uint8)
        #       scale_percent = 60 # percent of original size
        #       width = int(img.shape[1] * scale_percent / 100)
        #       height = int(img.shape[0] * scale_percent / 100)
        #       dim = (width, height)
        #       # resize image
        #       for i, line in enumerate(readouts):
        #           y = y0 + i*dy
        #           cv2.putText(img, line, (50, y), font, fontScale, fontColor, lineType)
        #          #stringexec ="cv2.putText(img, TextList[" + str(TextList.index(i))+"], (100, 100+"+str(m)+"), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 100, 100), 1, cv2.LINE_AA)\n"
        #          #m += 100
        #       #cv2.putText(
        #       #    img,readout,
        #       #    topLeftCornerOfText,
        #       #    font,
        #       #    fontScale,
        #       #    fontColor,
        #       #    lineType)
        #       #Display the image
        #       resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
        #       cv2.imshow("img",resized)
        #       cv2.waitKey(1)
        #      #for p, v in model.get_parameters().items():
        #      #    print(p, v.shape)
        n_step += 1

        if dones:
            env.reset()
            #          #show_state(env, path_lengths, changes, regions, n_step)
            #           if 'binary' in env_name:
            #               infer_info['path_lengths'] = path_lengths[-1]
            #               infer_info['changes'] = changes[-1]
            #               infer_info['regions'] = regions[-1]
            n_step = 0
            n_trials += 1
Exemple #7
0
def main(game, representation, n_frames, n_cpu, render, logging, **kwargs):
    if game not in [
            "binary_ctrl", "sokoban_ctrl", "zelda_ctrl", "smb_ctrl",
            "MicropolisEnv", "RCT"
    ]:
        raise Exception(
            "Not a controllable environment. Maybe add '_ctrl' to the end of the name? E.g. 'sokoban_ctrl'"
        )
    kwargs['n_cpu'] = n_cpu
    env_name = get_env_name(game, representation)
    print('env name: ', env_name)
    exp_name = get_exp_name(game, representation, **kwargs)

    resume = kwargs.get('resume', False)
    ca_action = kwargs.get('ca_action')

    if representation == 'wide' and not ('RCT' in game
                                         or 'Micropolis' in game):
        if ca_action:
            raise Exception()
#           policy = CApolicy
        else:
            policy = FullyConvPolicyBigMap
#           policy = WidePolicy

        if game == "sokoban" or game == "sokoban_ctrl":
            #           T()
            policy = FullyConvPolicySmallMap
    else:
        #       policy = ActorCriticCnnPolicy
        policy = CustomPolicyBigMap

        if game == "sokoban" or game == "sokoban_ctrl":
            #           T()
            policy = CustomPolicySmallMap
    crop_size = kwargs.get('cropped_size')

    if crop_size == -1:
        kwargs['cropped_size'] = get_crop_size(game)

    exp_id = kwargs.get('experiment_id')
    #   n = kwargs.get('experiment_id')

    #   if n is None:
    #       n = max_exp_idx(exp_name)
    #       if not resume:
    #           n += 1
    global log_dir

    exp_name_id = '{}_{}'.format(exp_name, exp_id)
    #   log_dir = 'rl_runs/{}_{}_log'.format(exp_name, n)
    log_dir = 'rl_runs/{}_log'.format(exp_name_id)

    kwargs = {
        **kwargs,
        'render_rank': 0,
        'render': render,
    }

    #   if not resume:
    try:
        os.mkdir(log_dir)
        print("Log directory does not exist, starting anew, bb.")
        resume = False
    except Exception:
        print("Log directory exists, fumbling on. Will try to load model.")
    try:
        env, dummy_action_space, n_tools = make_vec_envs(
            env_name, representation, log_dir, **kwargs)
    except Exception as e:
        # if this is a new experiment, clean up the logging directory if we fail to start up

        #       if not resume:
        #           os.rmdir(log_dir)
        raise e

    with open(os.path.join(log_dir, 'settings.json'), 'w',
              encoding='utf-8') as f:
        json.dump(kwargs, f, ensure_ascii=False, indent=4)

#       pass
    if resume:
        model = load_model(log_dir, n_tools=n_tools)

    if representation == 'wide':
        #       policy_kwargs = {'n_tools': n_tools}
        policy_kwargs = {}

        if ca_action:
            # FIXME: there should be a better way hahahaha
            env.action_space = dummy_action_space
            # more frequent updates, for debugging... or because our action space is huge?
#           n_steps = 512
        else:
            pass
#           n_steps = 2048
    else:
        policy_kwargs = {}
        # the default for SB3 PPO
#       n_steps = 2048

    if not resume or model is None:
        # model = PPO(policy, env, verbose=1, n_steps=n_steps,
        #             tensorboard_log="./runs", policy_kwargs=policy_kwargs)
        model = PPO2(policy,
                     env,
                     verbose=1,
                     tensorboard_log="./rl_runs",
                     policy_kwargs=policy_kwargs)


#   else:
    model.set_env(env)

    #model.policy = model.policy.to('cuda:0')
    #   if torch.cuda.is_available():
    #       model.policy = model.policy.cuda()
    tb_log_name = '{}_tb'.format(exp_name_id)
    if not logging:
        model.learn(total_timesteps=n_frames, tb_log_name=tb_log_name)
    else:
        model.learn(total_timesteps=n_frames,
                    tb_log_name=tb_log_name,
                    callback=callback)
Exemple #8
0
def infer(game, representation, experiment, infer_kwargs, **kwargs):
    """
     - max_trials: The number of trials per evaluation.
     - infer_kwargs: Args to pass to the environment.
    """
    infer_kwargs = {
            **infer_kwargs,
            'inference': True,
            'render': True,
            }
    max_trials = kwargs.get('max_trials', -1)
    n = kwargs.get('n', None)
    env_name = '{}-{}-v0'.format(game, representation)
    exp_name = get_exp_name(game, representation, experiment, **kwargs)
    if n is None:
        n = max_exp_idx(exp_name)
    if n == 0:
        raise Exception('Did not find ranked saved model of experiment: {}'.format(exp_name))
    log_dir = 'runs/{}_{}_{}'.format(exp_name, n, 'log')
    model = load_model(log_dir)
    # no log dir, 1 parallel environment
    n_cpu = infer_kwargs.get('n_cpu', 12)
    env = make_vec_envs(env_name, representation, None, n_cpu, **infer_kwargs)
    obs = env.reset()
    # Record final values of each trial
    if 'binary' in env_name:
        path_lengths = []
        changes = []
        regions = []
        infer_info = {
            'path_lengths': [],
            'changes': [],
            'regions': [],
            }
    n_trials = 0
    while n_trials != max_trials:
       #action = get_action(obs, env, model)
        action, _ = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        reward = rewards[0]
        n_regions = info[0]['regions']
        readouts = []
        if 'binary' in env_name:
            curr_path_length = info[0]['path-length']
            readouts.append('path length: {}'.format(curr_path_length) )
            path_lengths.append(curr_path_length)
            changes.append(info[0]['changes'])
            regions.append(info[0]['regions'])

        readouts += ['regions: {}'.format(n_regions), 'reward: {}'.format(reward)]
        stringexec = ""
        m=0
        y0, dy = 50, 40
        img = np.zeros((256,512,3), np.uint8)
        scale_percent = 60 # percent of original size
        width = int(img.shape[1] * scale_percent / 100)
        height = int(img.shape[0] * scale_percent / 100)
        dim = (width, height)
        # resize image
        for i, line in enumerate(readouts):
            y = y0 + i*dy
            cv2.putText(img, line, (50, y), font, fontScale, fontColor, lineType)
           #stringexec ="cv2.putText(img, TextList[" + str(TextList.index(i))+"], (100, 100+"+str(m)+"), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 100, 100), 1, cv2.LINE_AA)\n"
           #m += 100
        #cv2.putText(
        #    img,readout,
        #    topLeftCornerOfText,
        #    font,
        #    fontScale,
        #    fontColor,
        #    lineType)
        #Display the image
        resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
        cv2.imshow("img",resized)
        cv2.waitKey(1)
       #for p, v in model.get_parameters().items():
       #    print(p, v.shape)
        if dones:
           #show_state(env, path_lengths, changes, regions, n_step)
            if 'binary' in env_name:
                infer_info['path_lengths'] = path_lengths[-1]
                infer_info['changes'] = changes[-1]
                infer_info['regions'] = regions[-1]
            n_trials += 1
    return infer_info
        args.num_classes = 4
        in_channel = 3
    elif args.dataset == "TinyImageNet":
        load_dataset = TinyImageNet_Dataset()
        args.num_classes = 20
        in_channel = 3
    elif args.dataset == "CIFAR100":
        load_dataset = CIFAR100_Dataset()
        args.num_classes = 15
        in_channel = 3
    elif args.dataset == "CIFARAddN":
        load_dataset = CIFARAddN_Dataset()
        args.num_classes = 4
        in_channel = 3

    exp_name = utils.get_exp_name(args)
    print("Experiment: %s" % exp_name)

    ### run experiment 1/5 times
    for run_idx in range(args.exp, args.exp + 1):
        print("Begin to Run Exp %s..." % run_idx)
        args.run_idx = run_idx
        seed_sampler = int(args.seed_sampler.split(' ')[run_idx])
        # seed_sampler = None
        save_path = 'results/%s/%s/%s' % (args.dataset, exp_name, str(run_idx))
        args.save_path = save_path
        if not os.path.exists(save_path):
            os.makedirs(save_path)

        latent_dim = 32
        if args.encode_z:
Exemple #10
0
def evaluate(game, representation, infer_kwargs, fix_trgs=False, **kwargs):
    """
     - max_trials: The number of trials per evaluation.
     - infer_kwargs: Args to pass to the environment.
    """
    global N_BINS
    global N_MAPS
    global N_TRIALS

    infer_kwargs = {**infer_kwargs, "inference": True, "evaluate": True}
    #   max_trials = kwargs.get("max_trials", -1)
    #   n = kwargs.get("n", None)
    exp_id = infer_kwargs.get('experiment_id')
    #   map_width = infer_kwargs.get("map_width")
    max_steps = infer_kwargs.get("max_step")
    eval_controls = infer_kwargs.get("eval_controls")
    env_name = get_env_name(game, representation)
    #   env_name = '{}-{}-v0'.format(game, representation)
    exp_name = get_exp_name(game, representation, **kwargs)
    levels_im_name = "{}_{}-bins_levels.png"

    #   if n is None:
    #       if EXPERIMENT_ID is None:
    #           n = max_exp_idx(exp_name)
    #           print(
    #               "Experiment index not specified, setting index automatically to {}".format(
    #                   n
    #               )
    #           )
    #       else:
    #           n = EXPERIMENT_ID

    #   if n == 0:
    #       raise Exception(
    #           "Did not find ranked saved model of experiment: {}".format(exp_name)
    #       )
    crop_size = infer_kwargs.get("cropped_size")

    if crop_size == -1:
        infer_kwargs["cropped_size"] = get_crop_size(game)
    log_dir = os.path.join(EXPERIMENT_DIR,
                           '{}_{}_log'.format(exp_name, exp_id))
    eval_dir = os.path.join(log_dir, 'eval')
    if not os.path.isdir(eval_dir):
        os.mkdir(eval_dir)
#   log_dir = "{}/{}_{}_log".format(EXPERIMENT_DIR, exp_name, exp_id)
    data_path = os.path.join(eval_dir, "{}_eval_data".format(N_BINS))
    data_path_levels = os.path.join(eval_dir,
                                    "{}_eval_data_levels".format(N_BINS))
    if fix_trgs:
        data_path += "_fixTrgs"
        data_path_levels += "_fixTrgs"
    data_path += ".pkl"
    data_path_levels += ".pkl"

    if VIS_ONLY:
        #       if RENDER_LEVELS:
        #           eval_data_levels = pickle.load(open(data_path_levels, "rb"))
        #           eval_data_levels.render_levels()

        #           return
        eval_data = pickle.load(open(data_path, "rb"))
        # FIXME: just for backward compatibility
        eval_data.eval_dir = eval_dir
        eval_data.render_levels()
        eval_data.visualize_data(eval_dir, fix_trgs)
        #       eval_data.hamming_heatmap(None, eval_data.div_scores)

        return
    # no log dir, 1 parallel environment
    n_cpu = infer_kwargs.get("n_cpu")
    infer_kwargs['render_path'] = True
    env, dummy_action_space, n_tools = make_vec_envs(env_name, representation,
                                                     None, **infer_kwargs)
    model = load_model(log_dir,
                       load_best=infer_kwargs.get("load_best"),
                       n_tools=n_tools)
    #   model.set_env(env)
    env.action_space = dummy_action_space
    env = env.envs[0]
    # Record final values of each trial
    #   if 'binary' in env_name:
    #       path_lengths = []
    #       changes = []
    #       regions = []
    #       infer_info = {
    #           'path_lengths': [],
    #           'changes': [],
    #           'regions': [],
    #           }

    if n_cpu == 1:
        #       control_bounds = env.envs[0].get_control_bounds()
        control_bounds = env.get_control_bounds()
    elif n_cpu > 1:
        raise Exception("no homie, no")
        # supply args and kwargs
        env.remotes[0].send(("env_method", ("get_control_bounds", [], {})))
        control_bounds = env.remotes[0].recv()

    if not eval_controls:
        eval_controls = control_bounds.keys()
    if len(control_bounds) == 0:
        # Then this is a non-controllable agent.
        # Can't we just do this in all cases though?
        control_bounds = env.cond_bounds
    ctrl_bounds = [(k, control_bounds[k]) for k in eval_controls]

    #   if len(ctrl_bounds) == 0 and DIVERSITY_EVAL:
    #       N_MAPS = 100
    #       N_TRIALS = 1
    # Hackish get initial states
    init_states = []

    for i in range(N_MAPS):
        env.reset()
        # TODO: set initial states in either of these domains?

        if not (RCT or SC):
            init_states.append(env.unwrapped._rep._map)
    N_EVALS = N_TRIALS * N_MAPS

    def eval_static_trgs():
        '''Run an evaluation on the default values for all level metrics. For both controllable and vanilla agents. 
        The latter's "home turf."'''
        N_BINS = None
        level_images = []
        cell_scores = np.zeros(shape=(1, 1, N_EVALS))
        div_scores = np.zeros(shape=(1, 1))
        cell_static_scores = np.zeros(shape=(1, 1, N_EVALS))
        cell_ctrl_scores = np.zeros(shape=(1, 1, N_EVALS))
        level_tokens = None

        #       if DIVERSITY_EVAL:
        #           n_row = 1
        #           n_col = 1
        #       else:
        n_row = 2
        n_col = 5

        for i in range(n_row):
            level_images_y = []

            for j in range(n_col):
                net_score, ctrl_score, static_score, level_image, tokens = eval_episodes(
                    model,
                    env,
                    N_EVALS,
                    n_cpu,
                    init_states,
                    eval_dir,
                    env.unwrapped._prob.static_trgs,
                    max_steps,
                )
                level_images_y.append(level_image)
                cell_scores[0, 0, :] = net_score
                div_score = np.sum(
                    [np.sum(a != b) for a in tokens
                     for b in tokens]) / (len(tokens) * (len(tokens) - 1))
                div_score = div_score / (map_width * map_width)
                div_scores[0, 0] = div_score

            level_images.append(np.hstack(level_images_y))

        image = np.vstack(level_images[::-1])
        image = Image.fromarray(image)
        image.save(
            os.path.join(eval_dir, levels_im_name.format(ctrl_names, N_BINS)))

        return cell_scores, cell_static_scores, cell_ctrl_scores, div_scores, level_tokens, image

    if len(ctrl_bounds) == 0:
        # If we didn't train with controls, we'll evaluate inside a grid of targets (on the controllable agents' turf)
        # and record scores for the cell corresponding to the default static targets (on the vanilla turf),
        # depending on the value of fix_trgs.
        ctrl_names = prob_cond_metrics[problem]
        ctrl_bounds = [(k, env.cond_bounds[k]) for k in ctrl_names]

    if fix_trgs:
        ctrl_names = None
        ctrl_ranges = None
        cell_scores, cell_static_scores, cell_ctrl_scores, div_scores, level_tokens, image = eval_static_trgs(
        )

    elif len(ctrl_bounds) == 1:
        ctrl_name = ctrl_bounds[0][0]
        bounds = ctrl_bounds[0][1]
        step_size = max((bounds[1] - bounds[0]) / (N_BINS[0] - 1), 1)
        eval_trgs = np.arange(bounds[0], bounds[1] + 1, step_size)
        level_images = []
        cell_scores = np.zeros((len(eval_trgs), 1, N_EVALS))
        cell_ctrl_scores = np.zeros(shape=(len(eval_trgs), 1, N_EVALS))
        cell_static_scores = np.zeros(shape=(len(eval_trgs), 1, N_EVALS))
        level_tokens = []
        div_scores = np.zeros((len(eval_trgs), 1))

        for i, trg in enumerate(eval_trgs):
            trg_dict = {ctrl_name: trg}
            print("evaluating control targets: {}".format(trg_dict))
            #           set_ctrl_trgs(env, {ctrl_name: trg})
            net_score, ctrl_score, static_score, level_image, tokens = eval_episodes(
                model, env, N_EVALS, n_cpu, init_states, eval_dir, trg_dict,
                max_steps)
            div_score = div_calc(tokens)
            div_scores[i, 0] = div_score
            if i % LVL_RENDER_INTERVAL == 0:
                level_images.append(level_image)
            cell_scores[i, :, :] = net_score
            cell_ctrl_scores[i, :, :] = ctrl_score
            cell_static_scores[i, :, :] = static_score
            level_tokens.append(tokens)
        ctrl_names = (ctrl_name, None)
        ctrl_ranges = (eval_trgs, None)
        #       if "regions" in ctrl_ranges:
        #           # hack it to ensure our default static trgs are in the heatmap, so we can compare on baseline's turf
        #           ctrl_ranges["regions"][0] = 1

        ims = np.hstack(level_images)
        image = Image.fromarray(ims)
        image.save(
            os.path.join(eval_dir, levels_im_name.format(ctrl_names, N_BINS)))

    elif len(ctrl_bounds) >= 2:
        ctrl_0, ctrl_1 = ctrl_bounds[0][0], ctrl_bounds[1][0]
        b0, b1 = ctrl_bounds[0][1], ctrl_bounds[1][1]
        step_0 = max((b0[1] - b0[0]) / (N_BINS[0] - 1), 1)
        step_1 = max((b1[1] - b1[0]) / (N_BINS[-1] - 1), 1)
        trgs_0 = np.arange(b0[0], b0[1] + 0.5, step_0)
        trgs_1 = np.arange(b1[0], b1[1] + 0.5, step_1)
        cell_scores = np.zeros(shape=(len(trgs_0), len(trgs_1), N_EVALS))
        div_scores = np.zeros(shape=(len(trgs_0), len(trgs_1)))
        cell_ctrl_scores = np.zeros(shape=(len(trgs_0), len(trgs_1), N_EVALS))
        cell_static_scores = np.zeros(shape=(len(trgs_0), len(trgs_1),
                                             N_EVALS))
        level_tokens = [[None] * len(trgs_0)] * len(trgs_1)  # Wait what?
        trg_dict = env.static_trgs
        trg_dict = dict([(k, min(v)) if isinstance(v, tuple) else (k, v)
                         for (k, v) in trg_dict.items()])
        level_images = []

        for i, t0 in enumerate(trgs_0):
            level_images_y = []

            for j, t1 in enumerate(trgs_1):
                ctrl_trg_dict = {ctrl_0: t0, ctrl_1: t1}
                trg_dict.update(ctrl_trg_dict)
                print("evaluating control targets: {}".format(trg_dict))
                #           set_ctrl_trgs(env, {ctrl_name: trg})
                net_score, ctrl_score, static_score, level_image, tokens = eval_episodes(
                    model,
                    env,
                    N_EVALS,
                    n_cpu,
                    init_states,
                    eval_dir,
                    trg_dict,
                    max_steps,
                )

                if j % LVL_RENDER_INTERVAL == 0:
                    level_images_y.append(level_image)
                cell_scores[i, j, :] = net_score
                cell_ctrl_scores[i, j, :] = ctrl_score
                cell_static_scores[i, j, :] = static_score
                div_score = div_calc(tokens)
                div_scores[i, j] = div_score
            #               level_tokens[j][i] = tokens

            if i % LVL_RENDER_INTERVAL == 0:
                level_images.append(np.hstack(level_images_y))

        #           level_tokens.append(tokens)
        ctrl_names = (ctrl_0, ctrl_1)
        ctrl_ranges = (trgs_0, trgs_1)
        image = None

        image = np.vstack(level_images[::-1])
        image = Image.fromarray(image)
        image.save(
            os.path.join(eval_dir, levels_im_name.format(ctrl_names, N_BINS)))
    levels_im_path = os.path.join(eval_dir,
                                  levels_im_name.format(ctrl_names, N_BINS))

    eval_data = EvalData(
        ctrl_names,
        ctrl_ranges,
        cell_scores,
        cell_ctrl_scores,
        cell_static_scores,
        div_scores=div_scores,
        eval_dir=eval_dir,
        levels_image=image,
        levels_im_path=levels_im_path,
    )
    pickle.dump(eval_data, open(data_path, "wb"))
    eval_data.visualize_data(eval_dir, fix_trgs)

    #   else:
    #       levels_im_path = os.path.join(
    #           eval_dir, levels_im_name.format(ctrl_names, N_BINS)
    #       )
    #       eval_data_levels = EvalData(
    #           ctrl_names,
    #           ctrl_ranges,
    #           cell_scores,
    #           cell_ctrl_scores,
    #           cell_static_scores,
    #           div_scores=div_scores,
    #           eval_dir=eval_dir,
    #           levels_image=image,
    #           levels_im_path=levels_im_path,
    #       )
    #       pickle.dump(eval_data_levels, open(data_path_levels, "wb"))
    if not fix_trgs:
        eval_data.render_levels()

    if DIVERSITY_EVAL:
        #       eval_data = eval_data

        if fix_trgs:
            eval_data.save_stats(div_scores=div_scores, fix_trgs=fix_trgs)
        else:
            pass


#           eval_data.hamming_heatmap(level_tokens, div_scores=div_scores)

    env.close()
Exemple #11
0
        z_end = -z_start
        imgs = model.generate_sk_seq_from_z(z_start, z_end, num_steps)
        save_gif(imgs, 'diagonal_random_%d.gif' % i)

        x_batch = data_loader.get_test_data_batch(1024, 0, norm=True)

        x_start = x_batch[np.random.randint(0, x_batch.shape[0])]
        x_end = x_batch[np.random.randint(0, x_batch.shape[0])]
        imgs = model.generate_sk_seq_from_x(x_start, x_end, num_steps)
        save_gif(imgs, 'random_x_walk_%d.gif' % i)

if 'eval' in tasks:
    ## Evaluating Losses
    loss_x, loss_z, loss_c, loss_g = evaluate_losses(n_iter=20)

    exp_name = utils.get_exp_name()

    print '\n Losses:'
    print exp_name, 'X_recon_loss: %.05f iteration: %d' % (loss_x,
                                                           model.iter_number)
    print exp_name, 'Z_recon_loss: %.05f iteration: %d' % (loss_z,
                                                           model.iter_number)
    print exp_name, 'C_recon_loss: %.05f iteration: %d' % (loss_c,
                                                           model.iter_number)
    print exp_name, 'A_gener_loss: %.05f iteration: %d' % (loss_g,
                                                           model.iter_number)
    print ''

if 'zdist' in tasks:
    print('\nPlotting distribution for Z')
    z_dist_batch_size = 1024