def dqn_train(configs, time_data, sumoCmd): # Environment Setting from Agent.dqn import Trainer if configs['model'] == 'base': from Env.Env import TL3x3Env elif configs['model'] == 'frap': from Env.FRAP import TL3x3Env # EXP_CONFIG Setting NUM_EPOCHS = configs['num_epochs'] MAX_STEPS = configs['max_steps'] tl_rl_list = configs['tl_rl_list'] epoch = 0 # init agent and tensorboard writer # agent setting agent = Trainer(configs) writer = SummaryWriter(os.path.join( configs['current_path'], 'training_data', time_data)) # save hyper parameters agent.save_params(time_data) # init training while epoch < NUM_EPOCHS: # Epoch Start traci.start(sumoCmd) step = 0 action_distribution = tuple() # Epoch Start setting env = TL3x3Env(configs) traci.trafficlight.setRedYellowGreenState(tl_rl_list[0], 'G{0}{3}rr{2}{3}rG{0}{3}rr{2}{3}r'.format( 'G'*configs['num_lanes'], 'G', 'r'*configs['num_lanes'], 'r')) before_action=torch.ones((1,len(tl_rl_list))) done = False total_reward = 0 reward = 0 arrived_vehicles = 0 # state initialization state = env.get_state() # Time Check a = time.time() while step < MAX_STEPS: action = agent.get_action(state) action_distribution += tuple(action.unsqueeze(1)) # action 을 정하고 # action이 before_actio과 같으면 yellow없이 진행하고 if before_action!=action: traci.trafficlight.setRedYellowGreenState( tl_rl_list[0], 'y'*28) arrived_vehicles += simulation_step(env, 5) step+=5 # environment에 적용 env.step(action) # action 적용함수 #적용 후 20초 진행 arrived_vehicles += simulation_step(env, 20) step+=20 next_state = env.get_state() # 다음스테이트 reward = env.get_reward() # 20초 지연된 보상 agent.save_replay(state, action, reward, next_state) # dqn agent.update(done) state = next_state total_reward += reward before_action=action # 20초 끝나고 yellow 4초 b = time.time() traci.close() print("time:", b-a) epoch += 1 # update hyper parameter agent.update_hyperparams(epoch) # lr and epsilon upate if epoch % agent.configs['target_update_period'] == 0: agent.target_update() # dqn # once in an epoch update tensorboard update_tensorboard(writer, epoch, env, agent, arrived_vehicles) print('======== {} epoch/ return: {} arrived number:{}'.format(epoch, total_reward, arrived_vehicles)) if epoch % 50 == 0: agent.save_weights( configs['file_name']+'_{}_{}'.format(time_data, epoch)) writer.close()
def dqn_train(configs, time_data, sumoCmd): from Agent.dqn import Trainer if configs['model'] == 'base': from Env.Env import TL3x3Env elif configs['model'] == 'frap': from Env.FRAP import TL3x3Env NUM_EPOCHS = configs['num_epochs'] MAX_STEPS = configs['max_steps'] tl_rl_list = configs['tl_rl_list'] # init agent and tensorboard writer writer = SummaryWriter( os.path.join(configs['current_path'], 'training_data', time_data)) agent = Trainer(configs) # save hyper parameters agent.save_params(time_data) # init training epoch = 0 while epoch < NUM_EPOCHS: traci.start(sumoCmd) env = TL3x3Env(configs) traci.trafficlight.setRedYellowGreenState( tl_rl_list[0], 'G{0}{3}rr{2}{3}rG{0}{3}rr{2}{3}r'.format( 'G' * configs['num_lanes'], 'G', 'r' * configs['num_lanes'], 'r')) step = 0 done = False # state initialization # agent setting total_reward = 0 reward = 0 arrived_vehicles = 0 state = env.get_state() action_distribution = tuple() a = time.time() while step < MAX_STEPS: ''' # state=env.get_state(action) #partial하게는 env에서 조정 action=agent.get_action(state) env.step(action) reward=env.get_reward() next_state=env.get_state() # if traci.inductionloop.getLastStepVehicleNumber("0") > 0: store transition in D (experience replay) Sample random minibatch from D step += 1 state=next_state set yi ''' action = agent.get_action(state) action_distribution += tuple(action.unsqueeze(1)) env.step(action) # action 적용함수 for _ in range(20): # 10초마다 행동 갱신 traci.simulationStep() env.collect_state() step += 1 arrived_vehicles += traci.simulation.getArrivedNumber( ) # throughput next_state = env.get_state() # 다음스테이트 traci.trafficlight.setRedYellowGreenState(tl_rl_list[0], 'y' * 28) for _ in range(5): # 4번더 traci.simulationStep() env.collect_state() step += 1 arrived_vehicles += traci.simulation.getArrivedNumber( ) # throughput reward = env.get_reward() # 25초 지연된 보상 agent.save_replay(state, action, reward, next_state) # dqn agent.update(done) state = next_state total_reward += reward # 20초 끝나고 yellow 4초 agent.update_hyperparams(epoch) # lr and epsilon upate if epoch % 2 == 0: agent.target_update() # dqn b = time.time() traci.close() print("time:", b - a) epoch += 1 # once in an epoch update_tensorboard(writer, epoch, env, agent, arrived_vehicles) print('======== {} epoch/ return: {} arrived number:{}'.format( epoch, total_reward, arrived_vehicles)) if epoch % 50 == 0: agent.save_weights(configs['file_name'] + '_{}_{}'.format(time_data, epoch)) writer.close()
def REINFORCE_train(configs, time_data, sumoCmd): from Agent.REINFORCE import Trainer from Agent.REINFORCE import DEFAULT_CONFIG from Env.Env import TL3x3Env tl_rl_list = configs['tl_rl_list'] NUM_EPOCHS = configs['num_epochs'] MAX_STEPS = configs['max_steps'] # init agent and tensorboard writer agent = Trainer(configs) writer = SummaryWriter( os.path.join(configs['current_path'], 'training_data', time_data)) # save hyper parameters agent.save_params(time_data) # init training epoch = 0 while epoch < NUM_EPOCHS: traci.start(sumoCmd) traci.trafficlight.setRedYellowGreenState( tl_rl_list[0], 'G{0}{3}rr{2}{3}rG{0}{3}rr{2}{3}r'.format( 'G' * configs['num_lanes'], 'G', 'r' * configs['num_lanes'], 'r')) env = TL3x3Env(configs) # env = GridEnv( configs) step = 0 done = False # state initialization # agent setting total_reward = 0 reward = 0 arrived_vehicles = 0 state = env.get_state() while step < MAX_STEPS: action = agent.get_action(state) env.step(action) # action 적용함수 next_state = env.get_state() for _ in range(20): # 10초마다 행동 갱신 traci.simulationStep() env.collect_state() step += 1 arrived_vehicles += traci.simulation.getArrivedNumber( ) # throughput # 20초 끝나고 yellow 4초 traci.trafficlight.setRedYellowGreenState(tl_rl_list[0], 'y' * 28) for _ in range(5): # 4번더 traci.simulationStep() env.collect_state() step += 1 arrived_vehicles += traci.simulation.getArrivedNumber( ) # throughput reward = env.get_reward() prob = agent.get_prob() agent.put_data((reward, prob[action])) state = next_state total_reward += reward if step > MAX_STEPS: done = True agent.update(done) agent.update_hyperparams(epoch) # lr and epsilon upate traci.close() epoch += 1 # once in an epoch update_tensorboard(writer, epoch, env, agent, arrived_vehicles) print('======== {} epoch/ return: {} arrived number:{}'.format( epoch, total_reward, arrived_vehicles)) writer.close()
def ppo_train(configs, time_data, sumoCmd): from Agent.ppo import Trainer if configs['model'] == 'base': from Env.Env import TL3x3Env elif configs['model'] == 'frap': from Env.FRAP import TL3x3Env tl_rl_list = configs['tl_rl_list'] NUM_EPOCHS = configs['num_epochs'] MAX_STEPS = configs['max_steps'] # init agent and tensorboard writer agent = Trainer(configs) writer = SummaryWriter( os.path.join(configs['current_path'], 'training_data', time_data)) # save hyper parameters agent.save_params(time_data) # init training epoch = 0 ppo_update_step = 0 while epoch < NUM_EPOCHS: traci.start(sumoCmd) traci.trafficlight.setRedYellowGreenState( tl_rl_list[0], 'G{0}{3}rr{2}{3}rG{0}{3}rr{2}{3}r'.format( 'G' * configs['num_lanes'], 'G', 'r' * configs['num_lanes'], 'r')) env = TL3x3Env(configs) # env = GridEnv( configs) step = 0 done = False # state initialization # agent setting total_reward = 0 reward = 0 arrived_vehicles = 0 state = env.get_state() action_distribution = tuple() a = time.time() while step < MAX_STEPS: action = agent.get_action(state) action_distribution += tuple(action.unsqueeze(1)) env.step(action) # action 적용함수 ppo_update_step += 1 for _ in range(20): # 10초마다 행동 갱신 traci.simulationStep() env.collect_state() step += 1 arrived_vehicles += traci.simulation.getArrivedNumber( ) # throughput next_state = env.get_state() # 다음스테이트 traci.trafficlight.setRedYellowGreenState(tl_rl_list[0], 'y' * 28) for _ in range(5): # 4번더 traci.simulationStep() env.collect_state() step += 1 arrived_vehicles += traci.simulation.getArrivedNumber( ) # throughput reward = env.get_reward() # 25초 지연된 보상 agent.memory.rewards.append(reward) if step >= MAX_STEPS: done = True agent.memory.dones.append(done) state = next_state total_reward += reward if ppo_update_step % 400 == 0: agent.update() agent.update_hyperparams(epoch) # lr update ppo_update_step = 0 b = time.time() traci.close() print("time:", b - a) epoch += 1 # once in an epoch update_tensorboard(writer, epoch, env, agent, arrived_vehicles) print('======== {} epoch/ return: {} arrived number:{}'.format( epoch, total_reward, arrived_vehicles)) if epoch % 50 == 0: agent.save_weights(configs['file_name'] + '_{}_{}'.format(time_data, epoch)) writer.close()
def test(flags, configs, sumoConfig): from Env.Env import TL3x3Env from Agent.dqn import Trainer from Env.MultiEnv import GridEnv from utils import save_params, load_params, update_tensorboard # init test setting sumoBinary = checkBinary('sumo-gui') sumoCmd = [sumoBinary, "-c", sumoConfig] # setting the rl list tl_rl_list = configs['tl_rl_list'] MAX_STEPS = configs['max_steps'] reward = 0 traci.start(sumoCmd) agent = Trainer(configs) # setting the replay if flags.replay_name is not None: agent.load_weights(flags.replay_name) configs = load_params(configs, flags.replay_name) env = TL3x3Env(configs) step = 0 # state initialization state = env.get_state() # agent setting total_reward = 0 arrived_vehicles = 0 action_distribution = tuple() with torch.no_grad(): while step < MAX_STEPS: action = agent.get_action(state) action_distribution += action env.step(action) # action 적용함수 for _ in range(20): # 10초마다 행동 갱신 env.collect_state() traci.simulationStep() step += 1 traci.trafficlight.setRedYellowGreenState(tl_rl_list[0], 'y' * 28) for _ in range(5): traci.simulationStep() env.collect_state() step += 1 reward = env.get_reward() next_state = env.get_state() # agent.save_replay(state, action, reward, next_state) state = next_state total_reward += reward step += 1 if step == MAX_STEPS: done = True # agent.update(done) # no update in # loss += agent.get_loss() # 총 loss arrived_vehicles += traci.simulation.getArrivedNumber( ) # throughput traci.simulationStep() # if step % 200 == 0: agent.target_update() traci.close() print('======== return: {} arrived number:{}'.format( total_reward, arrived_vehicles))