def gather(self): for i in range(self.levelCount): level_name = self.levels[i] print('gathering from ' + level_name) delay = self.gather_delay[i] args = copy.deepcopy(self.args) args.memory_delay = delay args.directory = 'gather_' + level_name agent, hasSavedMemory, max_frame_saved = play_game_real_a3c_incremental_init( args, agent_a3c.Agent, self.state_dim, self.action_dim) while True: hasSavedMemory, max_frame_saved = play_game_real_a3c_incremental( agent, self.env, self.state_dim, self.action_dim, hasSavedMemory, max_frame_saved) if hasSavedMemory: break agent.metrics.save(agent.results_location, 'metrics') # Save metrics agent.metrics.runs.graph(agent.results_location, 'runs') agent.metrics = Metrics(agent.metrics.type) # Reset metrics agent.brain.metrics = agent.metrics print('switching levels') # Switch to next level self.env.env.env.press('right_arrow') time.sleep(0.1) self.env.env.env.release('right_arrow') time.sleep(0.1) print('all done')
def incremental_learn(self, args, levels_list): args = copy.deepcopy(args) agent, hasSavedMemory, max_frame_saved = play_game_real_a3c_incremental_init( args, agent_a3c.Agent, self.state_dim, self.action_dim) length = len(levels_list) idxs = [] idx_diff = [] for i in range(length): idxs.append(self.levels.index(levels_list[i])) idx_diff.append((idxs[0] - self.curlevel) % self.levelCount) for i in range(1, length): idx_diff.append((idxs[i] - idxs[i - 1]) % self.levelCount) print(idxs) print(idx_diff) print('Switching Levels Initial') for i in range(idx_diff[0]): self.env.env.env.press('right_arrow') time.sleep(0.1) self.env.env.env.release('right_arrow') time.sleep(0.1) self.curlevel = idxs[0] for i in range(length): time_start = time.time() while True: hasSavedMemory, max_frame_saved = play_game_real_a3c_incremental( agent, self.env, self.state_dim, self.action_dim, hasSavedMemory, max_frame_saved) if time.time() - time_start > break_time: break if i != length - 1: print('Switching Levels') for j in range(idx_diff[i + 1]): self.env.env.env.press('right_arrow') time.sleep(0.1) self.env.env.env.release('right_arrow') time.sleep(0.1) self.curlevel = idxs[i + 1] save_weights(agent, 'id_' + str(i)) # Save weights agent.metrics.save(agent.results_location, 'metrics_id_' + str(i)) # Save metrics agent.metrics.runs.graph(agent.results_location, 'runs_id_' + str(i)) agent.metrics = Metrics(agent.metrics.type) # Reset metrics agent.brain.metrics = agent.metrics agent.brain.init_vars() # Reset network
def run_job(self, job): args = job.args level = job.level self.controller.switch(level) agent, hasSavedMemory, max_frame_saved = play_game_real_a3c_incremental_init(args, agent_a3c.Agent, self.state_dim, self.action_dim) time_start = time.time() while True: hasSavedMemory, max_frame_saved = play_game_real_a3c_incremental(agent, self.env, self.state_dim, self.action_dim, hasSavedMemory, max_frame_saved) if agent.args.mode == 'gather': if hasSavedMemory: break elif time.time() - time_start > self.break_time: break save_weights(agent, 'end') # Save weights agent.metrics.save(agent.results_location, 'metrics_end') # Save metrics agent.metrics.runs.graph(agent.results_location, 'runs_end') agent.metrics = Metrics(agent.metrics.type) # Reset metrics agent.brain.metrics = agent.metrics agent.brain.init_vars() # Reset network if agent.args.mode != 'gather': directory = agent.results_location allfiles = listdir(directory) onlyfiles = [f for f in allfiles if isfile(join(directory, f))] frame = [f for f in onlyfiles if 'model_frame' in f and '.h5' in f] frame_time = [int(f[12:-3]) for f in frame] if frame_time == []: max_file_name = 'model_end.h5' else: max_file_name = frame[np.argmax(frame_time)] max_file = join(directory,max_file_name) src = max_file dst = join(directory, 'model_max.h5') shutil.copyfile(src, dst)
def __init__(self, args, state_dim, action_dim, modelFunc=None): print(state_dim) self.state_dim = state_dim self.action_dim = action_dim self.h = args.hyper self.metrics = Metrics() self.memory = Memory(self.h.memory_size, self.state_dim, 1) self.brain = Brain(self, modelFunc) self.args = args self.epsilon = self.h.epsilon_init self.run_count = -1 self.replay_count = -1 self.save_iterator = -1 self.update_iterator = -1 self.mode = 'observe' load_weights(self) self.brain.update_target_model()
def __init__(self, args, state_dim, action_dim): self.h = args.hyper self.mode = 'observe' self.args = args self.metrics = Metrics() self.action_dim = action_dim self.state_dim = state_dim self.memory = Memory(self.h.memory_size) self.run_count = -1 self.replay_count = -1 self.save_iterator = -1 self.update_iterator = -1 if self.args.directory == 'default': self.args.directory = G.CUR_FOLDER results_location = G.RESULT_FOLDER_FULL + '/' + self.args.directory data_location = G.DATA_FOLDER_FULL + '/' + self.args.directory os.makedirs(results_location,exist_ok=True) # Generates results folder os.makedirs(data_location,exist_ok=True) # Generates data folder self.results_location = results_location + '/' self.data_location = data_location + '/'
def __init__(self, args, state_dim, action_dim, model_func=None, visualization=False, brain=None, idx=0): self.idx = idx self.state_dim = state_dim self.action_dim = action_dim self.args = args self.h = self.args.hyper self.epsilon = self.h.epsilon_init self.h.gamma_n = self.h.gamma**self.h.memory_size self.run_count = -1 self.replay_count = -1 self.save_iterator = -1 self.update_iterator = -1 self.mode = 'train' self.R = 0 self.visualization = visualization self.metrics = Metrics() self.memory = Memory(self.h.memory_size, self.state_dim, 1) if not brain: self.brain = Brain(self, model_func) else: self.brain = brain self.brain.init_model() load_weights(self) if self.args.env.problem != 'Hexagon': self.brain.finalize_model() save_class(self.args, self.data_location + 'args')
def gather(self): idxs = [] idx_diff = [] for i in range(self.levelCount): idxs.append(self.levels.index(self.levelpairs[i][1])) idx_diff.append((idxs[0] - self.curlevel) % self.levelCount) for i in range(1, self.levelCount): idx_diff.append((idxs[i] - idxs[i - 1]) % self.levelCount) print(idxs) print(idx_diff) print('Switching Levels Initial') for i in range(idx_diff[0]): self.env.env.env.press('right_arrow') time.sleep(0.1) self.env.env.env.release('right_arrow') time.sleep(0.1) self.curlevel = idxs[0] for i in range(self.levelCount): #level_name = self.levels[i] trained_index = self.levels.index(self.levelpairs[i][0]) training_index = self.levels.index(self.levelpairs[i][1]) level_name = self.levels[training_index] print('gathering from ' + level_name) delay = self.gather_delay[training_index] args = copy.deepcopy(self.args) args.memory_delay = delay args.directory = self.directory[0] #self.directory[trained_index] args.weight_override = self.weights[i] agent, hasSavedMemory, max_frame_saved = play_game_real_a3c_incremental_init( args, agent_a3c.Agent, self.state_dim, self.action_dim) while True: hasSavedMemory, max_frame_saved = play_game_real_a3c_incremental( agent, self.env, self.state_dim, self.action_dim, hasSavedMemory, max_frame_saved) if hasSavedMemory: break agent.metrics.save(agent.results_location, 'metrics') # Save metrics agent.metrics.runs.graph(agent.results_location, 'runs') agent.metrics = Metrics(agent.metrics.type) # Reset metrics agent.brain.metrics = agent.metrics #print('switching levels') # Switch to next level #$self.env.env.env.press('right_arrow') #time.sleep(0.1) #self.env.env.env.release('right_arrow') #time.sleep(0.1) if i != self.levelCount - 1: print('Switching Levels') for j in range(idx_diff[i + 1]): self.env.env.env.press('right_arrow') time.sleep(0.1) self.env.env.env.release('right_arrow') time.sleep(0.1) self.curlevel = idxs[i + 1] print('all done')