class Experiment_Offline: def __init__(self): self.SCENARIO = 'Basic' # self.SCENARIO = 'Corridor' # self.SCENARIO = 'Bottleneck' # self.SCENARIO = 'Crossway' self.setEnvironment(self.SCENARIO) self.setNetwork() self.isTerm = False self.flag_eval=False self.EvaluateList = [] self.st_time = time.time() def setEnvironment(self, SCENARIO): if SCENARIO == 'Basic': self.Parser = csim.Parser("Basic") obs = self.Observe() if SCENARIO == 'Basic': self.Scenario = Basic(obs) def setNetwork(self): network_dim = [] network_dim.append(4) network_dim.append(40) network_dim.append(2) self.Algorithm = DDPG(network_dim) def Observe(self): obs = self.Parser.Observe() obs = self.convert_to_numpy(obs) return obs def Execute(self, action_type, run_type): obs = self.Observe() action = self.Algorithm.Action(obs, action_type, run_type) action = self.convert_to_action_double(action) memory = self.Parser.Step(action, run_type == 'TEST') return obs, action, memory def Update(self, action_type, obs, action, memory): self.Algorithm.addMemory(action_type=='GREEDY', obs, action, memory['obs'], memory['reward'], memory['isTerm']); if action_type == 'ACTOR': self.Algorithm.Update() def WarmUp(self): print "\n\n" print "start time is ... ", strftime("%Y/%m/%d_%I:%M", localtime()) print "Warming up....." print "=========================================" self.warmup_iter = cst.WARMUP_ITERATION for i in range(self.warmup_iter): if self.warmup_iter<10 or i%(self.warmup_iter/10)==0: print "Warmup Generation...\t", (i / (float)(self.warmup_iter))*100,"%" self.isTerm = False self.Parser.Reset(-1) while not self.isTerm: obs, action, memory = self.Execute(action_type='GREEDY', run_type='TRAIN') memory['obs'] = self.convert_to_numpy(memory['obs']) # print "p: ", memory['obs']['agent'][0]['p'] # print "d: ", memory['obs']['agent'][0]['d'] if memory['isTerm']: self.isTerm = True self.Update('GREEDY', obs, action, memory) print "=========================================" print "Warmup Done!" print "Replay Memory Size : \t", self.Algorithm.rm.getMemorySize() print "Total Warmup Time : \t", time.time()-self.st_time, "seconds" return def Train(self): self.st_time = time.time() self.train_iter = cst.TRAIN_ITERATION print "Training..." print "Iteration : ", self.train_iter print "=========================================" step_count=0 for iteration in range(1, self.train_iter): if self.isTerm: print "Reset New Episode : ", step_count," Steps" step_count=0 self.Algorithm.expl_rate_decay() self.isTerm = False if self.flag_eval: self.episode_evaluation() self.flag_eval=False else: self.Parser.Reset(-1) obs, action, memory = self.Execute(action_type='ACTOR', run_type='TRAIN') memory['obs'] = self.convert_to_numpy(memory['obs']) self.Update('ACTOR', obs, action, memory) if memory['isTerm']: self.isTerm = memory['isTerm'] step_count+=1 if self.train_iter<10 or iteration%(self.train_iter/10) == 0: print "\n", iteration/(float)(self.train_iter/10)*10, "% Done...\n" if iteration % 400 == 0: self.flag_eval=True training_time = time.time() - self.st_time self.Algorithm.save(FLAG_M_REPLAY_SAVE, training_time, self.EvaluateList) self.learning_graph() print "=========================================" print "Training Done!" print "Total Training Time : \t", training_time, "seconds" return def learning_graph(self): plt.plot(self.EvaluateList) plt.show() def episode_evaluation(self): evaluation_num = cst.EVALUTAION_SET if self.SCENARIO == 'Circle': evaluation_num = cst.AGENT_COUNT step_count = 0 avg_reward = 0 total_reward = 0 for i in range(evaluation_num): self.Parser.Reset(i) for j in range(150): obs, action, memory = self.Execute(action_type='ACTOR', run_type='TRAIN') total_reward += memory['reward'] step_count += 1 if memory['isTerm']: break avg_reward = total_reward / float(step_count) self.EvaluateList.append(avg_reward) print "=======================================" print "Avg Reward : ", avg_reward print "=======================================" def convert_to_numpy(self, obs): agent_num = len(obs['agent']) for i in range(agent_num): obs['agent'][i]['v'] = obs['agent'][i]['v'][0] obs['agent'][i]['p'] = np.array(obs['agent'][i]['p']) obs['agent'][i]['q'] = np.array(obs['agent'][i]['q']) obs['agent'][i]['d'] = np.array(obs['agent'][i]['d']) obs['agent'][i]['front'] = obs['agent'][i]['front'][0] obs['agent'][i]['delta'] = np.array(obs['agent'][i]['delta']) obs['agent'][i]['d_map'] = np.array(obs['agent'][i]['d_map']) obstacle_num = len(obs['obstacle']) for i in range(obstacle_num): obs['obstacle'][i]['p'] = np.array(obs['obstacle'][i]['p']) return obs def convert_to_action_double(self, action): action_len = len(action) for i in range(action_len): action[i]['theta'] = float(action[i]['theta']) action[i]['velocity'] = float(action[i]['velocity']) return action
class Experiment: def __init__(self, WIDTH=1260, HEIGHT=680): self.WIDTH = WIDTH self.HEIGHT = HEIGHT self.initGL() self.initFlag() SCENARIO = 'Basic' # SCENARIO = 'Corridor' # SCENARIO = 'Bottleneck' # SCENARIO = 'Crossway' self.setEnvironment(SCENARIO) self.setNetwork() if FLAG_USE_RECENT_CKPT: print "Load Network" self.load_network() else: print "New Network" if FLAG_WARMUP_FOR_TRAINING: self.WarmUp() self.isTerm = False self.timer_func() glutMainLoop() def initGL(self): argv = sys.argv glutInit(argv) glutInitWindowPosition(0, 0) glutInitWindowSize(self.WIDTH, self.HEIGHT) glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGB | GLUT_DEPTH) self.windowID = glutCreateWindow("Crowd Simulation") glutDisplayFunc(self.display) glutReshapeFunc(self.reshape) glutKeyboardFunc(self.keyCB) def initFlag(self): self.flag = {} self.flag['quit'] = False self.flag['play'] = False self.flag['train'] = False self.flag['greedy'] = False self.flag['step'] = False self.flag['weight'] = False self.flag['depth'] = False self.flag['record'] = False self.flag['replay'] = False self.flag['trajectory'] = False def setEnvironment(self, SCENARIO): if SCENARIO == 'Basic': self.Parser = csim.Parser("Basic") elif SCENARIO == 'Corridor': self.Parser = csim.Parser("Corridor") elif SCENARIO == 'Bottleneck': self.Parser = csim.Parser("Bottleneck") elif SCENARIO == 'Crossway': self.Parser = csim.Parser("Crossway") obs = self.Observe() if SCENARIO == 'Basic': self.Scenario = Basic(obs) # elif SCENARIO == 'Corridor': # self.Scenario = Corridor(obs) # elif SCENARIO == 'Bottleneck': # self.Scenario = Bottleneck(obs) # elif SCENARIO == 'Crossway': # self.Scenario = Crossway(obs) def setNetwork(self): network_dim = [] network_dim.append(4) network_dim.append(40) network_dim.append(2) self.Algorithm = DDPG(network_dim) def Reset(self): self.flag['play'] = False self.flag['train'] = False self.flag['record'] = False self.flag['replay'] = False self.flag['greedy'] = False self.Parser.Reset(-1) obs = self.Observe() self.Scenario.setObjectData(obs) def Observe(self): obs = self.Parser.Observe() obs = self.convert_to_numpy(obs) return obs def Execute(self, action_type, run_type): obs = self.Observe() action = self.Algorithm.Action(obs, action_type, run_type) action = self.convert_to_action_double(action) memory = self.Parser.Step(action, run_type == 'TEST') return obs, action, memory def Update(self, action_type, obs, action, memory): self.Algorithm.addMemory(action_type == 'GREEDY', obs, action, memory['obs'], memory['reward'], memory['isTerm']) if action_type == 'ACTOR': self.Algorithm.Update() def WarmUp(self): print "Warming up..." self.warmup_iter = cst.WARMUP_ITERATION for i in range(self.warmup_iter): if self.warmup_iter < 10 or i % (self.warmup_iter / 10) == 0: log("Warmup Generation...\t" + str((i / (float)(self.warmup_iter)) * 100) + "%") self.isTerm = False self.Parser.Reset(-1) while not self.isTerm: obs, action, memory = self.Execute(action_type='GREEDY', run_type='TRAIN') memory['obs'] = self.convert_to_numpy(memory['obs']) if memory['isTerm']: self.isTerm = True self.Scenario.setObjecytData(memory['obs']) self.Update('GREEDY', obs, action, memory) self.flag['warmup'] = False self.Parser.Reset(-1) print "Warmup Done!" def convert_to_numpy(self, obs): agent_num = len(obs['agent']) for i in range(agent_num): obs['agent'][i]['d_map'] = np.array(obs['agent'][i]['d_map']) obs['agent'][i]['d'] = np.array(obs['agent'][i]['d']) obs['agent'][i]['delta'] = np.array(obs['agent'][i]['delta']) obs['agent'][i]['q'] = np.array(obs['agent'][i]['q']) obs['agent'][i]['p'] = np.array(obs['agent'][i]['p']) obs['agent'][i]['v'] = obs['agent'][i]['v'][0] obs['agent'][i]['front'] = obs['agent'][i]['front'][0] obs['agent'][i]['color'] = np.array(obs['agent'][i]['color']) obstacle_num = len(obs['obstacle']) for i in range(obstacle_num): obs['obstacle'][i]['p'] = np.array(obs['obstacle'][i]['p']) return obs def convert_to_action_double(self, action): action_len = len(action) for i in range(action_len): action[i]['theta'] = float(action[i]['theta']) action[i]['velocity'] = float(action[i]['velocity']) return action def timer_func(self, fps=120): if self.flag['replay']: fps = 40 self.frame += 1 else: fps = 120 self.frame = 0 if self.flag['train']: if self.isTerm: print "New Episode" self.Algorithm.expl_rate_decay() self.isTerm = False obs, action, memory = self.Execute(action_type='ACTOR', run_type='TRAIN') memory['obs'] = self.convert_to_numpy(memory['obs']) self.Scenario.setObjectData(memory['obs']) if memory['isTerm']: self.Parser.Reset(-1) self.Update('ACTOR', obs, action, memory) elif self.flag['play']: if self.flag['greedy']: obs, action, memory = self.Execute(action_type='GREEDY', run_type='TEST') else: obs, action, memory = self.Execute(action_type='ACTOR', run_type='TEST') self.Scenario.setObjectData(memory['obs']) if memory['isTerm']: self.flag['play'] = False print "Scenario Ended" if self.flag['step']: self.flag['play'] = not self.flag['play'] self.flag['step'] = False self.Scenario.record(self.flag['record']) glutPostRedisplay() glutTimerFunc(int(1000 / fps), self.timer_func, fps) def load_network(self, m_replay=False): self.Algorithm.load_network(type='actor') self.Algorithm.load_network(type='critic') if m_replay: self.Algorithm.load_memory() self.Algorithm.load_eval() def save_network(self, m_replay=False, training_time=0, eval_list=None): self.Algorithm.save(m_replay, training_time, eval_list) def mouseCB(self, button, state, x, y): pass def keyCB(self, key, x, y): if key: if key == 'q': self.flag['quit'] = True glutDestroyWindow(self.windowID) elif key == 'd': if self.flag['train']: print "Stopped Training...!" self.save_network() else: print "Start Training...!" self.flag['train'] = not self.flag['train'] self.flag['play'] = False elif key == 's': if self.flag['record']: print "STOP record" else: print "START record" self.flag['record'] = not self.flag['record'] elif key == 'e': self.flag['depth'] = not self.flag['depth'] elif self.flag['train']: print "Training... press 'd' to stop training" elif key == ' ': if self.flag['play']: print "STOP" else: print "PLAY" self.flag['play'] = not self.flag['play'] self.flag['greedy'] = False self.flag['train'] = False elif key == 'r': self.Reset() elif key == 'p': self.flag['replay'] = True self.frame = 0 print "START replay" self.flag['play'] = False self.flag['greedy'] = False self.flag['train'] = False elif key == 't': self.flag['trajectory'] = not self.flag['trajectory'] print "traj : ", self.flag['trajectory'] elif key == 'n': print "One Step" self.flag['play'] = True self.flag['greedy'] = False self.flag['step'] = True elif key == 'g': print "Policy : Greedy" self.flag['play'] = True self.flag['greedy'] = True def reshape(self, w, h): glViewport(0, 0, w, h) glMatrixMode(GL_PROJECTION) glLoadIdentity() glOrtho(-self.WIDTH / 2, self.WIDTH / 2, -self.HEIGHT / 2, self.HEIGHT / 2, -30, 30) glMatrixMode(GL_MODELVIEW) glLoadIdentity() def display(self): glClearColor(0.9, 0.9, 0.9, 0.0) glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT) glMatrixMode(GL_MODELVIEW) glLoadIdentity() gluLookAt(0, 0, 30, 0, 0, 0, 0, 1, 0) glPushMatrix() if self.flag['replay'] and self.Scenario.record_size != 0: self.Scenario.render_record(self.frame) else: self.Scenario.render(depth=self.flag['depth'], trajectory=self.flag['trajectory']) glPopMatrix() glutSwapBuffers()