def _run(resolution=16, score_objects=True, mean_repeat=20, explorer='repeated', seen_weight=0.0, seen_power=1.0, chosen_weight=0.0, chosen_power=1.0, action_weight=0.0, action_power=1.0, horiz_weight=0.3, vert_weight=0.1, low_score_weight=0.5, high_score_weight=10.0, explore_steps=100, ignore_death=1, x_repeat=2, show=False, seed_path=None, base_path='./results/', clear_old_checkpoints=True, game="montezuma", chosen_since_new_weight=0, chosen_since_new_power=1, warn_delete=True, low_level_weight=0.1, objects_from_pixels=True, objects_remember_rooms=True, only_keys=True, optimize_score=True, use_real_pos=True, target_shape=(6, 6), max_pix_value=255, prob_override=0.0, reset_pool=False, pool_class='py', start_method='fork', path_postfix='', n_cpus=None, save_prob_pictures=False, save_item_pictures=False, keep_prob_pictures=False, keep_item_pictures=False, batch_size=100, reset_cell_on_update=False, actors=1, nexp=None, lr=1.0e-03, lr_decay=0.99999, cliprange=0.1, cl_decay=0.99999, n_tr_epochs=2, mbatch=4, gamma=0.99, lam=0.95, log_path="log", nsubs=8, timedialation=20, master_lr=0.01, lr_decay_master=0.99999, master_cl=0.1, cl_decay_master=0.99999, warmup=20, train=40, retrain_N=None, with_domain=False, load_model=None, reward_function='clip', ent_mas=0.01, ent_sub=0.01, pacmanScoreRes=None, render=None, render_frameskip=4, clean_up_grid=False): sess = None if game == "robot": explorer = RepeatedRandomExplorerRobot() elif explorer == "ppo": ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 config = ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=ncpu, inter_op_parallelism_threads=ncpu) config.gpu_options.allow_growth = True # pylint: disable=E1101 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' sess = Session(config=config).__enter__() if nexp is None: nexp = explore_steps explorer = PPOExplorer(actors=actors, nexp=nexp, lr=lr, lr_decay=lr_decay, cliprange=cliprange, cl_decay=cl_decay, n_tr_epochs=n_tr_epochs, nminibatches=mbatch, gamma=gamma, lam=lam, ent_coef=ent_sub) # if game == 'nchain': # explorer.init_model(env="NChain-v0", policy=MlpPolicy) # else: # explorer.init_model(env="MontezumaRevengeDeterministic-v4", policy=CnnPolicy) elif explorer == 'mlsh': ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 config = ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=ncpu, inter_op_parallelism_threads=ncpu) config.gpu_options.allow_growth = True # pylint: disable=E1101 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' sess = Session(config=config).__enter__() if nexp is None: nexp = explore_steps explorer = MlshExplorer(nsubs=nsubs, timedialation=timedialation, warmup_T=nexp * warmup, train_T=nexp * train, actors=actors, nexp=nexp // timedialation, lr_mas=master_lr, lr_sub=lr, lr_decay=lr_decay_master, lr_decay_sub=lr_decay, cl_decay=cl_decay_master, cl_decay_sub=cl_decay, n_tr_epochs=n_tr_epochs, nminibatches=mbatch, gamma=gamma, lam=lam, cliprange_mas=master_cl, cliprange_sub=cliprange, retrain_N=retrain_N, ent_m=ent_mas, ent_s=ent_sub) elif explorer == 'repeated': explorer = RepeatedRandomExplorer(mean_repeat) else: explorer = RandomExplorer() if game == "montezuma": game_class = MyMontezuma game_class.TARGET_SHAPE = target_shape game_class.MAX_PIX_VALUE = max_pix_value game_args = dict(score_objects=score_objects, x_repeat=x_repeat, objects_from_pixels=objects_from_pixels, objects_remember_rooms=objects_remember_rooms, only_keys=only_keys, unprocessed_state=True) grid_resolution = (GridDimension('level', 1), GridDimension('score', 1), GridDimension('room', 1), GridDimension('x', resolution), GridDimension('y', resolution)) elif game == "pitfall": game_class = pitfall_env.MyPitfall game_class.TARGET_SHAPE = target_shape game_class.MAX_PIX_VALUE = max_pix_value game_args = dict(score_objects=score_objects, x_repeat=x_repeat) grid_resolution = (GridDimension('level', 1), GridDimension('score', 1), GridDimension('room', 1), GridDimension('x', resolution), GridDimension('y', resolution)) elif game == "nchain": game_class = MyNChain game_class.TARGET_SHAPE = target_shape game_class.MAX_PIX_VALUE = max_pix_value game_args = dict(N=10000) grid_resolution = (GridDimension('state', 1), ) elif game == "pacman": game_class = MyMsPacman game_class.TARGET_SHAPE = target_shape game_class.MAX_PIX_VALUE = max_pix_value game_args = dict(x_repeat=x_repeat, unprocessed_state=True, render=render, frameskip=render_frameskip) if pacmanScoreRes is None: grid_resolution = (GridDimension('level', 1), GridDimension('x', resolution), GridDimension('y', resolution)) else: grid_resolution = (GridDimension('level', 1), GridDimension('score', pacmanScoreRes), GridDimension('x', resolution), GridDimension('y', resolution)) else: raise NotImplementedError("Unknown game: " + game) if game == "nchain": selector = NChainSelector( game_class, seen=Weight(seen_weight, seen_power), chosen=Weight(chosen_weight, chosen_power), action=Weight(action_weight, action_power), room_cells=Weight(0.0), dir_weights=DirWeights(horiz_weight, vert_weight, low_score_weight, high_score_weight), chosen_since_new_weight=Weight(chosen_since_new_weight, chosen_since_new_power), low_level_weight=low_level_weight, with_domain=use_real_pos) elif game == "pacman": selector = PacmanSelector( game_class, seen=Weight(seen_weight, seen_power), chosen=Weight(chosen_weight, chosen_power), action=Weight(action_weight, action_power), room_cells=Weight(0.0), dir_weights=DirWeights(horiz_weight, vert_weight, low_score_weight, high_score_weight), chosen_since_new_weight=Weight(chosen_since_new_weight, chosen_since_new_power), low_level_weight=low_level_weight) else: selector = WeightedSelector( game_class, seen=Weight(seen_weight, seen_power), chosen=Weight(chosen_weight, chosen_power), action=Weight(action_weight, action_power), room_cells=Weight(0.0), dir_weights=DirWeights(horiz_weight, vert_weight, low_score_weight, high_score_weight), chosen_since_new_weight=Weight(chosen_since_new_weight, chosen_since_new_power), low_level_weight=low_level_weight) pool_cls = multiprocessing.get_context(start_method).Pool if pool_class == 'torch': pool_cls = torch.multiprocessing.Pool elif pool_class == 'loky': pool_cls = LPool expl = Explore(explorer, selector, (game_class, game_args), grid_resolution, explore_steps=explore_steps, ignore_death=ignore_death, optimize_score=optimize_score, use_real_pos=use_real_pos, prob_override=prob_override, reset_pool=reset_pool, pool_class=pool_cls, n_cpus=n_cpus, batch_size=batch_size, reset_cell_on_update=reset_cell_on_update, with_domain=with_domain, load_model=load_model, reduce_grid=clean_up_grid) if seed_path is not None: expl.grid = pickle.load(lzma.open(seed_path, 'rb')) print(random.sample(list(expl.grid.keys()), 10)) print('Number at level > 0: ', len([e for e in expl.grid.keys() if e.level > 0])) n_digits = 12 old = 0 old_compute = 0 with tqdm(desc='Time (seconds)', smoothing=0, total=MAX_TIME) as t_time, tqdm( desc='Iterations', total=MAX_ITERATIONS) as t_iter, tqdm( desc='Compute steps', total=MAX_FRAMES_COMPUTE) as t_compute, tqdm( desc='Game step', total=MAX_FRAMES) as t: start_time = time.time() last_time = np.round(start_time) # TODO: make this more generic for each level switch seen_level_1 = False n_iters = 0 prev_checkpoint = None def should_continue(): if MAX_TIME is not None and time.time() - start_time >= MAX_TIME: return False if MAX_FRAMES is not None and expl.frames_true + old >= MAX_FRAMES: return False if MAX_FRAMES_COMPUTE is not None and expl.frames_compute + old_compute >= MAX_FRAMES_COMPUTE: return False if MAX_ITERATIONS is not None and n_iters >= MAX_ITERATIONS: return False if MAX_LEVEL is not None and len( Counter(e.level for e in expl.grid).keys()) > MAX_LEVEL: return False if TERM_CONDITION and False: return False return True logDir = f'{log_path}/{game}_{explorer.__repr__()}/res_{resolution}_explStep_{explore_steps}' f'_cellbatch_{batch_size}' if explorer.__repr__() == 'ppo': logDir = f'{logDir}_actors_{actors}_exp_{nexp}_lr_{lr}_lrDec_{lr_decay}_cl_{cliprange}_clDec_{cl_decay}' \ f'_mbatch_{mbatch}_trainEpochs_{n_tr_epochs}_gamma_{gamma}_lam_{lam}' if explorer.__repr__() == 'mlsh': logDir = f'{logDir}_subs_{nsubs}_td_{timedialation}_WU_{warmup}_tr_{train}_exp_{nexp}' \ f'_lrM_{master_lr}_lrDM_{lr_decay_master}_clM_{master_cl}' \ f'_clDM_{cl_decay_master}_lrS_{lr}_lrDS_{lr_decay}_clS_{cliprange}_clDS_{cl_decay}' \ f'_rt_{retrain_N}' \ f'_mb_{mbatch}_trEp_{n_tr_epochs}_gam_{gamma}_lam_{lam}' logDir = f'{logDir}_{time.time()}' global LOG_DIR LOG_DIR = logDir summaryWriter = summary.FileWriter(logdir=logDir, flush_secs=20) if sess is not None: summaryWriter.add_graph(graph=sess.graph) keys_found = [] removed_cells = 0 try: while should_continue(): # Run one iteration old += expl.frames_true old_compute += expl.frames_compute expl.run_cycle() t.update(expl.frames_true) #- old) t_compute.update(expl.frames_compute) #- old_compute) t_iter.update(1) cur_time = np.round(time.time()) t_time.update(int(cur_time - last_time)) last_time = cur_time n_iters += 1 if game == 'pacman': entry = [ summary.Summary.Value(tag='Rooms_Found', simple_value=max( e.level for e in expl.grid)) ] else: entry = [ summary.Summary.Value( tag='Rooms_Found', simple_value=len( Counter((e.room, e.level) for e in expl.grid).keys())) ] entry.append( summary.Summary.Value(tag='Cells', simple_value=len(expl.grid) + removed_cells)) entry.append( summary.Summary.Value( tag='Top_score', simple_value=max(e.score for e in expl.grid.values()))) if game == "montezuma": dist = Counter(e.score for e in expl.real_grid) for key in dist.keys(): if key not in keys_found: keys_found.append(key) hist = makeHistProto(dist, bins=30, keys=keys_found) entry.append( summary.Summary.Value(tag="Key_dist", histo=hist)) leveldist = Counter(e.level for e in expl.real_grid) histlvl = makeHistProto(leveldist, bins=5) entry.append( summary.Summary.Value(tag="Level_dist", histo=histlvl)) entry.append( summary.Summary.Value( tag="Avg traj-len", simple_value=(expl.frames_compute / batch_size) / explore_steps)) if sess is not None: bytes = sess.run(tf.contrib.memory_stats.MaxBytesInUse()) entry.append( summary.Summary.Value(tag="Memory Use", simple_value=bytes)) entry.extend(expl.summary) summaryWriter.add_summary(summary=summary.Summary(value=entry), global_step=expl.frames_compute + old_compute) # summaryWriter.add_run_metadata(expl.explorer.master.metadata, 'master_metadata', global_step=expl.frames_compute + old_compute) # for sub in expl.explorer.subs: # summaryWriter.add_run_metadata(sub.model.metadata, f'{sub}_metadata', # global_step=expl.frames_compute + old_compute) expl.summary = [] # In some circumstances (see comments), save a checkpoint and some pictures if ((not seen_level_1 and expl.seen_level_1) or # We have solved level 1 old == 0 or # It is the first iteration old // THRESH_TRUE != expl.frames_true // THRESH_TRUE or # We just passed the THRESH_TRUE threshold old_compute // THRESH_COMPUTE != expl.frames_compute // THRESH_COMPUTE or # We just passed the THRESH_COMPUTE threshold not should_continue()): # This is the last iteration #Remove old grid entries: if clean_up_grid: to_remove = set() max_level = max(e.level for e in expl.grid) for cell_key in expl.grid: if max_level - cell_key.level > 2: to_remove.add(cell_key) for cell_key in to_remove: del expl.grid[cell_key] removed_cells += 1 to_remove = set() for cell_key in expl.real_grid: if max_level - cell_key.level > 2: to_remove.add(cell_key) for cell_key in to_remove: expl.real_grid.remove(cell_key) # Quick bookkeeping, printing update seen_level_1 = expl.seen_level_1 filename = f'{base_path}/{expl.frames_true:0{n_digits}}_{expl.frames_compute:0{n_digits}}' tqdm.write( f'Cells at levels: {dict(Counter(e.level for e in expl.real_grid))}' ) tqdm.write( f'Cells at objects: {dict(Counter(e.score for e in expl.real_grid))}' ) tqdm.write( f'Max score: {max(e.score for e in expl.grid.values())}' ) tqdm.write(f'Compute cells: {len(expl.grid)}') # Save pictures if show or save_item_pictures or save_prob_pictures: # Show normal grid if show or save_item_pictures: get_env().render_with_known( list(expl.real_grid), resolution, show=False, filename=filename + '.png', get_val=lambda x: 1, combine_val=lambda x, y: x + y) if not use_real_pos: object_combinations = sorted( set(e.real_cell.score for e in expl.grid.values() if e.real_cell is not None)) for obj in object_combinations: grid_at_obj = [ e.real_cell for e in expl.grid.values() if e.real_cell is not None and e.real_cell.score == obj ] get_env().render_with_known( grid_at_obj, resolution, show=False, filename=filename + f'_object_{obj}.png', get_val=lambda x: 1, combine_val=lambda x, y: x + y) # Show probability grid if (use_real_pos and show) or save_prob_pictures: expl.selector.set_ranges(list(expl.grid.keys())) possible_scores = sorted( set(e.score for e in expl.grid)) total = np.sum([ expl.selector.get_weight( x, expl.grid[x], possible_scores, expl.grid) for x in expl.grid ]) get_env().render_with_known( list(expl.grid.keys()), resolution, show=False, filename=filename + '_prob.PNG', combine_val=lambda x, y: x + y, get_val=lambda x: expl.selector.get_weight( x, expl.grid[x], possible_scores, expl.grid ) / total, ) if prev_checkpoint and clear_old_checkpoints: if not keep_item_pictures: try: os.remove(prev_checkpoint + '.png') except FileNotFoundError: # If it doesn't exists, we don't need to remove it. pass if use_real_pos and not keep_prob_pictures: try: os.remove(prev_checkpoint + '_prob.PNG') except FileNotFoundError: # If it doesn't exists, we don't need to remove it. pass with open(filename + ".csv", 'w') as f: f.write(str(len(expl.grid))) f.write(", ") f.write(str(max([a.score for a in expl.grid.values()]))) f.write("\n") # Save checkpoints grid_copy = {} for k, v in expl.grid.items(): grid_copy[k] = v # TODO: is 7z still necessary now that there are other ways to reduce space? try: pickle.dump( grid_copy, lzma.open(filename + '.7z', 'wb', preset=0)) except MemoryError: print('MemoryError when saving grid checkpoint') # Clean up previous checkpoint. if prev_checkpoint and clear_old_checkpoints: try: os.remove(prev_checkpoint + '.7z') except FileNotFoundError: pass prev_checkpoint = filename # A much smaller file that should be sufficient for view folder, but not for restoring # the demonstrations. Should make view folder much faster. grid_set = {} for k, v in expl.grid.items(): grid_set[k] = v.score try: pickle.dump( grid_set, lzma.open(filename + '_set.7z', 'wb', preset=0)) pickle.dump( expl.real_grid, lzma.open(filename + '_set_real.7z', 'wb', preset=0)) except MemoryError: print( 'MemroyError when saving set and real_set checkpoint' ) if PROFILER: print("ITERATION:", n_iters) PROFILER.disable() PROFILER.dump_stats(filename + '.stats') # PROFILER.print_stats() PROFILER.enable() # Save a bit of memory by freeing our copies. grid_copy = None grid_set = None finally: # TODO Insert model save here if SAVE_MODEL and isinstance(expl.explorer, MlshExplorer): expl.explorer.master.save(f'{base_path}/master') expl.explorer.master.save(f'{logDir}/master') for sub in expl.explorer.subs: sub.save(f'{base_path}/{sub}') sub.save(f'{logDir}/{sub}') #print(expl.explorer.__repr__()) if sess is not None: sess.__exit__(None, None, None) tf.reset_default_graph() else: print('did not clear graph')
def add(self, step, key, value): summary = tb_summary.Summary() summary_value = summary.value.add() summary_value.tag = key summary_value.simple_value = value self._writer.add_summary(summary, global_step=step)