def playGameGym_ddqn(args, agent_func): if args.hyper.img_channels > 1: env = Environment_gym_rgb(args.env) state_dim = env.env.state_dim + [args.hyper.img_channels] else: env = Environment_gym(args.env) state_dim = env.env.state_dim action_dim = env.env.action_dim agent = agent_func(args, state_dim, action_dim, getattr(models, args.model)) iteration = 0 while (True): iteration += 1 R, useRate = env.run(agent) if agent.memory.total_saved > agent.h.extra.observe: if agent.mode == 'observe': agent.mode = 'train' print('Training...') time.sleep(0.5) if agent.mode == 'train': if iteration % 10 == 0: print("Step:", agent.memory.total_saved, ", Total reward:", R) #agent.metrics.display_metrics(frame, useRate, agent.memory.total_saved, agent.epsilon) if agent.h.save_rate < agent.save_iterator: agent.save_iterator -= agent.h.save_rate save_weights(agent)
def playGameReal_a3c(args, agent_func, screen_number=0, screen_id=-1): img_channels = args.hyper.img_channels env = Environment_realtime_a3c(args.env) action_dim = env.env.action_dim() state_dim = list(env.env.state_dim()) + [img_channels] #env = Environment_realtime_a3c(emulator, img_channels) print(state_dim) print(action_dim) agent = agent_func(args, state_dim, action_dim, getattr(models, args.model)) hasSavedMemory = False max_frame_saved = 300 while (True): pointer_start = agent.brain.brain_memory.curIndex + 0 frame, useRate, frame_saved = env.run(agent) pointer_end = agent.brain.brain_memory.curIndex + 0 agent.metrics.display_metrics(frame, useRate, agent.memory.total_saved, agent.epsilon) if frame_saved > max_frame_saved: print('New max time!') max_frame_saved = frame_saved save_memory_subset(agent, pointer_start, pointer_end, frame_saved, skip=8) save_weights(agent, 'frame_' + str(frame_saved)) if agent.h.save_rate < agent.save_iterator: agent.save_iterator -= agent.h.save_rate save_weights(agent, agent.run_count) agent.metrics.save(agent.results_location, 'metrics') agent.metrics.runs.graph(agent.results_location) #agent.metrics.save_metrics_v(agent.results_location) #agent.metrics.a3c.graph_all(agent.results_location) #agent.metrics.save_metrics_training(agent.results_location) if agent.brain.brain_memory.isFull and hasSavedMemory == False: hasSavedMemory = True saveMemory_v2(agent) if agent.args.mode == 'gather': print('Finished Gathering Data') break frame_saved = int(frame_saved / 4) if frame_saved > 400: frame_saved = 400 if frame_saved < 60: frame_saved = 60 if agent.brain.brain_memory.isFull: agent.brain.optimize_batch(frame_saved)
def playGameReal_ddqn(args, agent_func, screen_number=0, screen_id=-1): img_channels = args.hyper.img_channels env = Environment_realtime(args.env) action_dim = env.env.action_dim() state_dim = list(env.env.state_dim()) + [img_channels] agent = agent_func(args, state_dim, action_dim, getattr(models, args.hyper.model)) if args.data: # Load Memory loadMemory_v2(agent, args.data) agent.mode = 'train' loaded_replays = int(agent.memory.size) print('Running', loaded_replays, 'replays') # Train on loaded memory for i in range(loaded_replays): agent.update_agent() if i % 1000 == 0: print(i, '/', loaded_replays, 'replays learned') if i % 100 == 0: agent.replay(debug=True) else: agent.replay(debug=False) agent.save_weights() time.sleep(1) while (True): frame, useRate, frame_saved = env.run(agent) agent.metrics.display_metrics(frame, useRate, agent.memory.total_saved, agent.epsilon) if agent.mode == 'train': print('Running', frame_saved, 'replays') for i in range(frame_saved): if i % 100 == 0: agent.replay(debug=True) else: agent.replay(debug=False) if agent.h.save_rate < agent.save_iterator: agent.save_iterator -= agent.h.save_rate save_weights(agent) if agent.mode == 'train': # Fix this later, not correct agent.metrics.save(agent.results_location, 'metrics') agent.metrics.runs.graph(agent.results_location) agent.metrics.save_metrics_training(agent.results_location)
def incremental_learn(self, args, levels_list): args = copy.deepcopy(args) agent, hasSavedMemory, max_frame_saved = playGameReal_a3c_incremental_init( args, agent_a3c.Agent, self.state_dim, self.action_dim) length = len(incremental_levels1) idxs = [] idx_diff = [] for i in range(length): idxs.append(self.levels.index(levels_list[i])) idx_diff.append((idxs[0] - self.curlevel) % self.levelCount) for i in range(1, length): idx_diff.append((idxs[i] - idxs[i - 1]) % self.levelCount) print(idxs) print(idx_diff) print('Switching Levels Initial') for i in range(idx_diff[0]): self.env.env.env.press('right_arrow') time.sleep(0.1) self.env.env.env.release('right_arrow') time.sleep(0.1) self.curlevel = idxs[0] for i in range(length): time_start = time.time() while True: hasSavedMemory, max_frame_saved = playGameReal_a3c_incremental( agent, self.env, self.state_dim, self.action_dim, hasSavedMemory, max_frame_saved) if time.time() - time_start > break_time: break if i != length - 1: print('Switching Levels') for j in range(idx_diff[i + 1]): self.env.env.env.press('right_arrow') time.sleep(0.1) self.env.env.env.release('right_arrow') time.sleep(0.1) self.curlevel = idxs[i + 1] save_weights(agent, 'id_' + str(i)) # Save weights agent.metrics.save(agent.results_location, 'metrics_id_' + str(i)) # Save metrics agent.metrics.runs.graph(agent.results_location, 'runs_id_' + str(i)) agent.metrics = Metrics(agent.metrics.type) # Reset metrics agent.brain.metrics = agent.metrics agent.brain.init_vars() # Reset network
def playGameReal_a3c_incremental(agent, env, state_dim, action_dim, hasSavedMemory, max_frame_saved): pointer_start = agent.brain.brain_memory.curIndex + 0 frame, useRate, frame_saved = env.run(agent) pointer_end = agent.brain.brain_memory.curIndex + 0 agent.metrics.display_metrics(frame, useRate, agent.memory.total_saved, agent.epsilon) if frame_saved > max_frame_saved: print('New max time!') max_frame_saved = frame_saved save_memory_subset(agent, pointer_start, pointer_end, frame_saved, skip=8) save_weights(agent, 'frame_' + str(frame_saved)) if agent.h.save_rate < agent.save_iterator: agent.save_iterator -= agent.h.save_rate if agent.args.mode != 'gather': save_weights(agent, agent.run_count) agent.metrics.save(agent.results_location, 'metrics') agent.metrics.runs.graph(agent.results_location) #agent.metrics.save_metrics_v(agent.results_location) #agent.metrics.a3c.graph_all(agent.results_location) #agent.metrics.save_metrics_training(agent.results_location) if agent.brain.brain_memory.isFull and hasSavedMemory == False: hasSavedMemory = True saveMemory_v2(agent) frame_saved = int(frame_saved / 4) if frame_saved > 400: frame_saved = 400 if frame_saved < 60: frame_saved = 60 if agent.brain.brain_memory.isFull: if agent.args.mode != 'gather': agent.brain.optimize_batch(frame_saved) return hasSavedMemory, max_frame_saved
def playGameGym_a3c_multithread(agent, env): iteration = 0 while (True): iteration += 1 R, useRate = env.run(agent) if agent.mode == 'train': if iteration % 10 == 0: print("Step:", agent.memory.total_saved, ", Total reward:", R, "idx:", agent.idx) #agent.metrics.display_metrics(frame, useRate, agent.memory.total_saved, agent.epsilon) if agent.idx == 0: if agent.h.save_rate < agent.save_iterator: agent.save_iterator -= agent.h.save_rate save_weights(agent, agent.run_count)
def run_job(self, job): args = job.args level = job.level self.controller.switch(level) agent, hasSavedMemory, max_frame_saved = playGameReal_a3c_incremental_init( args, agent_a3c.Agent, self.state_dim, self.action_dim) time_start = time.time() while True: hasSavedMemory, max_frame_saved = playGameReal_a3c_incremental( agent, self.env, self.state_dim, self.action_dim, hasSavedMemory, max_frame_saved) if agent.args.mode == 'gather': if hasSavedMemory: break elif time.time() - time_start > self.break_time: break save_weights(agent, 'end') # Save weights agent.metrics.save(agent.results_location, 'metrics_end') # Save metrics agent.metrics.runs.graph(agent.results_location, 'runs_end') agent.metrics = Metrics(agent.metrics.type) # Reset metrics agent.brain.metrics = agent.metrics agent.brain.init_vars() # Reset network if agent.args.mode != 'gather': directory = agent.results_location allfiles = listdir(directory) onlyfiles = [f for f in allfiles if isfile(join(directory, f))] frame = [f for f in onlyfiles if 'model_frame' in f and '.h5' in f] frame_time = [int(f[12:-3]) for f in frame] if frame_time == []: max_file_name = 'model_end.h5' else: max_file_name = frame[np.argmax(frame_time)] max_file = join(directory, max_file_name) src = max_file dst = join(directory, 'model_max.h5') shutil.copyfile(src, dst)
def run(self): iteration = 0 while not self.stop_signal: iteration += 1 R, useRate = self.env.run(self.agent) if self.agent.mode == 'train': if iteration % 10 == 0: print("Step:", self.agent.memory.total_saved, ", Total reward:", R, "idx:", self.agent.idx) #agent.metrics.display_metrics(frame, useRate, agent.memory.total_saved, agent.epsilon) if self.agent.idx == 0: if self.agent.h.save_rate < self.agent.save_iterator: self.agent.save_iterator -= self.agent.h.save_rate save_weights(self.agent, self.agent.brain.brain_memory.total_saved)