Ejemplo n.º 1
0
def _run(resolution=16,
         score_objects=True,
         mean_repeat=20,
         explorer='repeated',
         seen_weight=0.0,
         seen_power=1.0,
         chosen_weight=0.0,
         chosen_power=1.0,
         action_weight=0.0,
         action_power=1.0,
         horiz_weight=0.3,
         vert_weight=0.1,
         low_score_weight=0.5,
         high_score_weight=10.0,
         explore_steps=100,
         ignore_death=1,
         x_repeat=2,
         show=False,
         seed_path=None,
         base_path='./results/',
         clear_old_checkpoints=True,
         game="montezuma",
         chosen_since_new_weight=0,
         chosen_since_new_power=1,
         warn_delete=True,
         low_level_weight=0.1,
         objects_from_pixels=True,
         objects_remember_rooms=True,
         only_keys=True,
         optimize_score=True,
         use_real_pos=True,
         target_shape=(6, 6),
         max_pix_value=255,
         prob_override=0.0,
         reset_pool=False,
         pool_class='py',
         start_method='fork',
         path_postfix='',
         n_cpus=None,
         save_prob_pictures=False,
         save_item_pictures=False,
         keep_prob_pictures=False,
         keep_item_pictures=False,
         batch_size=100,
         reset_cell_on_update=False,
         actors=1,
         nexp=None,
         lr=1.0e-03,
         lr_decay=0.99999,
         cliprange=0.1,
         cl_decay=0.99999,
         n_tr_epochs=2,
         mbatch=4,
         gamma=0.99,
         lam=0.95,
         log_path="log",
         nsubs=8,
         timedialation=20,
         master_lr=0.01,
         lr_decay_master=0.99999,
         master_cl=0.1,
         cl_decay_master=0.99999,
         warmup=20,
         train=40,
         retrain_N=None,
         with_domain=False,
         load_model=None,
         reward_function='clip',
         ent_mas=0.01,
         ent_sub=0.01,
         pacmanScoreRes=None,
         render=None,
         render_frameskip=4,
         clean_up_grid=False):
    sess = None
    if game == "robot":
        explorer = RepeatedRandomExplorerRobot()
    elif explorer == "ppo":
        ncpu = multiprocessing.cpu_count()
        if sys.platform == 'darwin': ncpu //= 2
        config = ConfigProto(allow_soft_placement=True,
                             intra_op_parallelism_threads=ncpu,
                             inter_op_parallelism_threads=ncpu)
        config.gpu_options.allow_growth = True  # pylint: disable=E1101
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
        sess = Session(config=config).__enter__()
        if nexp is None:
            nexp = explore_steps
        explorer = PPOExplorer(actors=actors,
                               nexp=nexp,
                               lr=lr,
                               lr_decay=lr_decay,
                               cliprange=cliprange,
                               cl_decay=cl_decay,
                               n_tr_epochs=n_tr_epochs,
                               nminibatches=mbatch,
                               gamma=gamma,
                               lam=lam,
                               ent_coef=ent_sub)
        # if game == 'nchain':
        # 	explorer.init_model(env="NChain-v0", policy=MlpPolicy)
        # else:
        # 	explorer.init_model(env="MontezumaRevengeDeterministic-v4", policy=CnnPolicy)
    elif explorer == 'mlsh':
        ncpu = multiprocessing.cpu_count()
        if sys.platform == 'darwin': ncpu //= 2
        config = ConfigProto(allow_soft_placement=True,
                             intra_op_parallelism_threads=ncpu,
                             inter_op_parallelism_threads=ncpu)
        config.gpu_options.allow_growth = True  # pylint: disable=E1101
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
        sess = Session(config=config).__enter__()
        if nexp is None:
            nexp = explore_steps
        explorer = MlshExplorer(nsubs=nsubs,
                                timedialation=timedialation,
                                warmup_T=nexp * warmup,
                                train_T=nexp * train,
                                actors=actors,
                                nexp=nexp // timedialation,
                                lr_mas=master_lr,
                                lr_sub=lr,
                                lr_decay=lr_decay_master,
                                lr_decay_sub=lr_decay,
                                cl_decay=cl_decay_master,
                                cl_decay_sub=cl_decay,
                                n_tr_epochs=n_tr_epochs,
                                nminibatches=mbatch,
                                gamma=gamma,
                                lam=lam,
                                cliprange_mas=master_cl,
                                cliprange_sub=cliprange,
                                retrain_N=retrain_N,
                                ent_m=ent_mas,
                                ent_s=ent_sub)

    elif explorer == 'repeated':
        explorer = RepeatedRandomExplorer(mean_repeat)
    else:
        explorer = RandomExplorer()

    if game == "montezuma":
        game_class = MyMontezuma
        game_class.TARGET_SHAPE = target_shape
        game_class.MAX_PIX_VALUE = max_pix_value
        game_args = dict(score_objects=score_objects,
                         x_repeat=x_repeat,
                         objects_from_pixels=objects_from_pixels,
                         objects_remember_rooms=objects_remember_rooms,
                         only_keys=only_keys,
                         unprocessed_state=True)
        grid_resolution = (GridDimension('level',
                                         1), GridDimension('score', 1),
                           GridDimension('room',
                                         1), GridDimension('x', resolution),
                           GridDimension('y', resolution))
    elif game == "pitfall":
        game_class = pitfall_env.MyPitfall
        game_class.TARGET_SHAPE = target_shape
        game_class.MAX_PIX_VALUE = max_pix_value
        game_args = dict(score_objects=score_objects, x_repeat=x_repeat)
        grid_resolution = (GridDimension('level',
                                         1), GridDimension('score', 1),
                           GridDimension('room',
                                         1), GridDimension('x', resolution),
                           GridDimension('y', resolution))
    elif game == "nchain":
        game_class = MyNChain
        game_class.TARGET_SHAPE = target_shape
        game_class.MAX_PIX_VALUE = max_pix_value
        game_args = dict(N=10000)
        grid_resolution = (GridDimension('state', 1), )
    elif game == "pacman":
        game_class = MyMsPacman
        game_class.TARGET_SHAPE = target_shape
        game_class.MAX_PIX_VALUE = max_pix_value
        game_args = dict(x_repeat=x_repeat,
                         unprocessed_state=True,
                         render=render,
                         frameskip=render_frameskip)
        if pacmanScoreRes is None:
            grid_resolution = (GridDimension('level', 1),
                               GridDimension('x', resolution),
                               GridDimension('y', resolution))
        else:
            grid_resolution = (GridDimension('level', 1),
                               GridDimension('score', pacmanScoreRes),
                               GridDimension('x', resolution),
                               GridDimension('y', resolution))
    else:
        raise NotImplementedError("Unknown game: " + game)

    if game == "nchain":
        selector = NChainSelector(
            game_class,
            seen=Weight(seen_weight, seen_power),
            chosen=Weight(chosen_weight, chosen_power),
            action=Weight(action_weight, action_power),
            room_cells=Weight(0.0),
            dir_weights=DirWeights(horiz_weight, vert_weight, low_score_weight,
                                   high_score_weight),
            chosen_since_new_weight=Weight(chosen_since_new_weight,
                                           chosen_since_new_power),
            low_level_weight=low_level_weight,
            with_domain=use_real_pos)
    elif game == "pacman":
        selector = PacmanSelector(
            game_class,
            seen=Weight(seen_weight, seen_power),
            chosen=Weight(chosen_weight, chosen_power),
            action=Weight(action_weight, action_power),
            room_cells=Weight(0.0),
            dir_weights=DirWeights(horiz_weight, vert_weight, low_score_weight,
                                   high_score_weight),
            chosen_since_new_weight=Weight(chosen_since_new_weight,
                                           chosen_since_new_power),
            low_level_weight=low_level_weight)
    else:
        selector = WeightedSelector(
            game_class,
            seen=Weight(seen_weight, seen_power),
            chosen=Weight(chosen_weight, chosen_power),
            action=Weight(action_weight, action_power),
            room_cells=Weight(0.0),
            dir_weights=DirWeights(horiz_weight, vert_weight, low_score_weight,
                                   high_score_weight),
            chosen_since_new_weight=Weight(chosen_since_new_weight,
                                           chosen_since_new_power),
            low_level_weight=low_level_weight)

    pool_cls = multiprocessing.get_context(start_method).Pool
    if pool_class == 'torch':
        pool_cls = torch.multiprocessing.Pool
    elif pool_class == 'loky':
        pool_cls = LPool

    expl = Explore(explorer,
                   selector, (game_class, game_args),
                   grid_resolution,
                   explore_steps=explore_steps,
                   ignore_death=ignore_death,
                   optimize_score=optimize_score,
                   use_real_pos=use_real_pos,
                   prob_override=prob_override,
                   reset_pool=reset_pool,
                   pool_class=pool_cls,
                   n_cpus=n_cpus,
                   batch_size=batch_size,
                   reset_cell_on_update=reset_cell_on_update,
                   with_domain=with_domain,
                   load_model=load_model,
                   reduce_grid=clean_up_grid)

    if seed_path is not None:
        expl.grid = pickle.load(lzma.open(seed_path, 'rb'))
        print(random.sample(list(expl.grid.keys()), 10))
        print('Number at level > 0: ',
              len([e for e in expl.grid.keys() if e.level > 0]))

    n_digits = 12

    old = 0
    old_compute = 0

    with tqdm(desc='Time (seconds)', smoothing=0,
              total=MAX_TIME) as t_time, tqdm(
                  desc='Iterations', total=MAX_ITERATIONS) as t_iter, tqdm(
                      desc='Compute steps',
                      total=MAX_FRAMES_COMPUTE) as t_compute, tqdm(
                          desc='Game step', total=MAX_FRAMES) as t:
        start_time = time.time()
        last_time = np.round(start_time)
        # TODO: make this more generic for each level switch
        seen_level_1 = False
        n_iters = 0
        prev_checkpoint = None

        def should_continue():
            if MAX_TIME is not None and time.time() - start_time >= MAX_TIME:
                return False
            if MAX_FRAMES is not None and expl.frames_true + old >= MAX_FRAMES:
                return False
            if MAX_FRAMES_COMPUTE is not None and expl.frames_compute + old_compute >= MAX_FRAMES_COMPUTE:
                return False
            if MAX_ITERATIONS is not None and n_iters >= MAX_ITERATIONS:
                return False
            if MAX_LEVEL is not None and len(
                    Counter(e.level for e in expl.grid).keys()) > MAX_LEVEL:
                return False
            if TERM_CONDITION and False:
                return False
            return True

        logDir = f'{log_path}/{game}_{explorer.__repr__()}/res_{resolution}_explStep_{explore_steps}' f'_cellbatch_{batch_size}'
        if explorer.__repr__() == 'ppo':
            logDir = f'{logDir}_actors_{actors}_exp_{nexp}_lr_{lr}_lrDec_{lr_decay}_cl_{cliprange}_clDec_{cl_decay}' \
             f'_mbatch_{mbatch}_trainEpochs_{n_tr_epochs}_gamma_{gamma}_lam_{lam}'
        if explorer.__repr__() == 'mlsh':
            logDir = f'{logDir}_subs_{nsubs}_td_{timedialation}_WU_{warmup}_tr_{train}_exp_{nexp}' \
             f'_lrM_{master_lr}_lrDM_{lr_decay_master}_clM_{master_cl}' \
             f'_clDM_{cl_decay_master}_lrS_{lr}_lrDS_{lr_decay}_clS_{cliprange}_clDS_{cl_decay}' \
             f'_rt_{retrain_N}' \
             f'_mb_{mbatch}_trEp_{n_tr_epochs}_gam_{gamma}_lam_{lam}'
        logDir = f'{logDir}_{time.time()}'
        global LOG_DIR
        LOG_DIR = logDir
        summaryWriter = summary.FileWriter(logdir=logDir, flush_secs=20)
        if sess is not None:
            summaryWriter.add_graph(graph=sess.graph)
        keys_found = []
        removed_cells = 0
        try:
            while should_continue():
                # Run one iteration
                old += expl.frames_true
                old_compute += expl.frames_compute

                expl.run_cycle()

                t.update(expl.frames_true)  #- old)
                t_compute.update(expl.frames_compute)  #- old_compute)
                t_iter.update(1)
                cur_time = np.round(time.time())
                t_time.update(int(cur_time - last_time))
                last_time = cur_time
                n_iters += 1

                if game == 'pacman':
                    entry = [
                        summary.Summary.Value(tag='Rooms_Found',
                                              simple_value=max(
                                                  e.level for e in expl.grid))
                    ]

                else:
                    entry = [
                        summary.Summary.Value(
                            tag='Rooms_Found',
                            simple_value=len(
                                Counter((e.room, e.level)
                                        for e in expl.grid).keys()))
                    ]
                entry.append(
                    summary.Summary.Value(tag='Cells',
                                          simple_value=len(expl.grid) +
                                          removed_cells))
                entry.append(
                    summary.Summary.Value(
                        tag='Top_score',
                        simple_value=max(e.score for e in expl.grid.values())))
                if game == "montezuma":
                    dist = Counter(e.score for e in expl.real_grid)
                    for key in dist.keys():
                        if key not in keys_found:
                            keys_found.append(key)
                    hist = makeHistProto(dist, bins=30, keys=keys_found)
                    entry.append(
                        summary.Summary.Value(tag="Key_dist", histo=hist))
                    leveldist = Counter(e.level for e in expl.real_grid)
                    histlvl = makeHistProto(leveldist, bins=5)
                    entry.append(
                        summary.Summary.Value(tag="Level_dist", histo=histlvl))

                entry.append(
                    summary.Summary.Value(
                        tag="Avg traj-len",
                        simple_value=(expl.frames_compute / batch_size) /
                        explore_steps))
                if sess is not None:
                    bytes = sess.run(tf.contrib.memory_stats.MaxBytesInUse())
                    entry.append(
                        summary.Summary.Value(tag="Memory Use",
                                              simple_value=bytes))

                entry.extend(expl.summary)
                summaryWriter.add_summary(summary=summary.Summary(value=entry),
                                          global_step=expl.frames_compute +
                                          old_compute)

                # summaryWriter.add_run_metadata(expl.explorer.master.metadata, 'master_metadata', global_step=expl.frames_compute + old_compute)
                # for sub in expl.explorer.subs:
                # 	summaryWriter.add_run_metadata(sub.model.metadata, f'{sub}_metadata',
                # 							   global_step=expl.frames_compute + old_compute)
                expl.summary = []

                # In some circumstances (see comments), save a checkpoint and some pictures
                if ((not seen_level_1 and expl.seen_level_1)
                        or  # We have solved level 1
                        old == 0 or  # It is the first iteration
                        old // THRESH_TRUE != expl.frames_true // THRESH_TRUE
                        or  # We just passed the THRESH_TRUE threshold
                        old_compute // THRESH_COMPUTE !=
                        expl.frames_compute // THRESH_COMPUTE
                        or  # We just passed the THRESH_COMPUTE threshold
                        not should_continue()):  # This is the last iteration

                    #Remove old grid entries:
                    if clean_up_grid:
                        to_remove = set()
                        max_level = max(e.level for e in expl.grid)
                        for cell_key in expl.grid:
                            if max_level - cell_key.level > 2:
                                to_remove.add(cell_key)
                        for cell_key in to_remove:
                            del expl.grid[cell_key]
                            removed_cells += 1

                        to_remove = set()
                        for cell_key in expl.real_grid:
                            if max_level - cell_key.level > 2:
                                to_remove.add(cell_key)
                        for cell_key in to_remove:
                            expl.real_grid.remove(cell_key)

                    # Quick bookkeeping, printing update
                    seen_level_1 = expl.seen_level_1
                    filename = f'{base_path}/{expl.frames_true:0{n_digits}}_{expl.frames_compute:0{n_digits}}'

                    tqdm.write(
                        f'Cells at levels: {dict(Counter(e.level for e in expl.real_grid))}'
                    )
                    tqdm.write(
                        f'Cells at objects: {dict(Counter(e.score for e in expl.real_grid))}'
                    )
                    tqdm.write(
                        f'Max score: {max(e.score for e in expl.grid.values())}'
                    )
                    tqdm.write(f'Compute cells: {len(expl.grid)}')

                    # Save pictures
                    if show or save_item_pictures or save_prob_pictures:
                        # Show normal grid
                        if show or save_item_pictures:
                            get_env().render_with_known(
                                list(expl.real_grid),
                                resolution,
                                show=False,
                                filename=filename + '.png',
                                get_val=lambda x: 1,
                                combine_val=lambda x, y: x + y)

                        if not use_real_pos:
                            object_combinations = sorted(
                                set(e.real_cell.score
                                    for e in expl.grid.values()
                                    if e.real_cell is not None))
                            for obj in object_combinations:
                                grid_at_obj = [
                                    e.real_cell for e in expl.grid.values()
                                    if e.real_cell is not None
                                    and e.real_cell.score == obj
                                ]
                                get_env().render_with_known(
                                    grid_at_obj,
                                    resolution,
                                    show=False,
                                    filename=filename + f'_object_{obj}.png',
                                    get_val=lambda x: 1,
                                    combine_val=lambda x, y: x + y)

                        # Show probability grid
                        if (use_real_pos and show) or save_prob_pictures:
                            expl.selector.set_ranges(list(expl.grid.keys()))
                            possible_scores = sorted(
                                set(e.score for e in expl.grid))
                            total = np.sum([
                                expl.selector.get_weight(
                                    x, expl.grid[x], possible_scores,
                                    expl.grid) for x in expl.grid
                            ])
                            get_env().render_with_known(
                                list(expl.grid.keys()),
                                resolution,
                                show=False,
                                filename=filename + '_prob.PNG',
                                combine_val=lambda x, y: x + y,
                                get_val=lambda x: expl.selector.get_weight(
                                    x, expl.grid[x], possible_scores, expl.grid
                                ) / total,
                            )
                        if prev_checkpoint and clear_old_checkpoints:
                            if not keep_item_pictures:
                                try:
                                    os.remove(prev_checkpoint + '.png')
                                except FileNotFoundError:
                                    # If it doesn't exists, we don't need to remove it.
                                    pass
                            if use_real_pos and not keep_prob_pictures:
                                try:
                                    os.remove(prev_checkpoint + '_prob.PNG')
                                except FileNotFoundError:
                                    # If it doesn't exists, we don't need to remove it.
                                    pass

                    with open(filename + ".csv", 'w') as f:
                        f.write(str(len(expl.grid)))
                        f.write(", ")
                        f.write(str(max([a.score
                                         for a in expl.grid.values()])))
                        f.write("\n")

                    # Save checkpoints
                    grid_copy = {}
                    for k, v in expl.grid.items():
                        grid_copy[k] = v
                    # TODO: is 7z still necessary now that there are other ways to reduce space?
                    try:
                        pickle.dump(
                            grid_copy,
                            lzma.open(filename + '.7z', 'wb', preset=0))
                    except MemoryError:
                        print('MemoryError when saving grid checkpoint')
                    # Clean up previous checkpoint.
                    if prev_checkpoint and clear_old_checkpoints:
                        try:
                            os.remove(prev_checkpoint + '.7z')
                        except FileNotFoundError:
                            pass
                    prev_checkpoint = filename

                    # A much smaller file that should be sufficient for view folder, but not for restoring
                    # the demonstrations. Should make view folder much faster.
                    grid_set = {}
                    for k, v in expl.grid.items():
                        grid_set[k] = v.score
                    try:
                        pickle.dump(
                            grid_set,
                            lzma.open(filename + '_set.7z', 'wb', preset=0))
                        pickle.dump(
                            expl.real_grid,
                            lzma.open(filename + '_set_real.7z',
                                      'wb',
                                      preset=0))
                    except MemoryError:
                        print(
                            'MemroyError when saving  set and real_set checkpoint'
                        )

                    if PROFILER:
                        print("ITERATION:", n_iters)
                        PROFILER.disable()
                        PROFILER.dump_stats(filename + '.stats')
                        # PROFILER.print_stats()
                        PROFILER.enable()
                    # Save a bit of memory by freeing our copies.
                    grid_copy = None
                    grid_set = None
        finally:
            # TODO Insert model save here
            if SAVE_MODEL and isinstance(expl.explorer, MlshExplorer):
                expl.explorer.master.save(f'{base_path}/master')
                expl.explorer.master.save(f'{logDir}/master')
                for sub in expl.explorer.subs:
                    sub.save(f'{base_path}/{sub}')
                    sub.save(f'{logDir}/{sub}')
            #print(expl.explorer.__repr__())
            if sess is not None:
                sess.__exit__(None, None, None)
                tf.reset_default_graph()
            else:
                print('did not clear graph')
Ejemplo n.º 2
0
 def add(self, step, key, value):
     summary = tb_summary.Summary()
     summary_value = summary.value.add()
     summary_value.tag = key
     summary_value.simple_value = value
     self._writer.add_summary(summary, global_step=step)