def super_dqn_train(configs, time_data, sumoCmd): from Agent.super_dqn import Trainer if configs['model'] == 'base': from Env.MultiEnv import GridEnv # elif configs['model'] == 'frap': # from Env.FRAP import TL3x3Env # will be added side_list = ['u', 'r', 'd', 'l'] tl_rl_list = list() for _, node in enumerate(configs['node_info']): if node['id'][-1] not in side_list: tl_rl_list.append(node['id']) configs['tl_rl_list'] = tl_rl_list NUM_EPOCHS = configs['num_epochs'] MAX_STEPS = configs['max_steps'] # init agent and tensorboard writer agent = Trainer(configs) writer = SummaryWriter(os.path.join( configs['current_path'], 'training_data', time_data)) # save hyper parameters agent.save_params(time_data) # init training epoch = 0 while epoch < NUM_EPOCHS: traci.start(sumoCmd) for tl_rl in tl_rl_list: traci.trafficlight.setRedYellowGreenState(tl_rl, 'G{0}{3}rr{2}{3}rG{0}{3}rr{2}{3}r'.format( 'G'*configs['num_lanes'], 'G', 'r'*configs['num_lanes'], 'r')) before_action= torch.ones((1,len(tl_rl_list),1),dtype=torch.int) env = GridEnv(configs) step = 0 done = False # state initialization # agent setting total_reward = 0 reward = 0 arrived_vehicles = 0 state = env.get_state() action_distribution = tuple() a = time.time() while step < MAX_STEPS: action = agent.get_action(state) action_distribution += tuple(action.unsqueeze(1)) # action 을 정하고 # action이 before_actio과 같으면 yellow없이 진행하고 for tl_rl in tl_rl_list: idx=tl_rl_list.index(tl_rl) if before_action[0][idx]==action[0][idx]: traci.trafficlight.setRedYellowGreenState( tl_rl, 'y'*(3+configs['num_lanes'])*4) arrived_vehicles += simulation_step(env, 5) step+=5 # environment에 적용 env.step(action) # action 적용함수 #적용 후 20초 진행 arrived_vehicles += simulation_step(env, 20) step+=20 next_state = env.get_state() # 다음스테이트 reward = env.get_reward() # 20초 지연된 보상 agent.save_replay(state, action, reward, next_state) # dqn agent.update(done) state = next_state total_reward += reward before_action=action # 20초 끝나고 yellow 4초 agent.update_hyperparams(epoch) # lr and epsilon upate if epoch % 2 == 0: agent.target_update() # dqn b = time.time() traci.close() print("time:", b-a) epoch += 1 # once in an epoch update_tensorboard(writer, epoch, env, agent, arrived_vehicles) print('======== {} epoch/ return: {} arrived number:{}'.format(epoch, total_reward.sum(), arrived_vehicles)) if epoch % 50 == 0: agent.save_weights( configs['file_name']+'_{}_{}'.format(time_data, epoch)) writer.close()
def dqn_train(configs, time_data, sumoCmd): # Environment Setting from Agent.dqn import Trainer if configs['model'] == 'base': from Env.Env import TL3x3Env elif configs['model'] == 'frap': from Env.FRAP import TL3x3Env # EXP_CONFIG Setting NUM_EPOCHS = configs['num_epochs'] MAX_STEPS = configs['max_steps'] tl_rl_list = configs['tl_rl_list'] epoch = 0 # init agent and tensorboard writer # agent setting agent = Trainer(configs) writer = SummaryWriter(os.path.join( configs['current_path'], 'training_data', time_data)) # save hyper parameters agent.save_params(time_data) # init training while epoch < NUM_EPOCHS: # Epoch Start traci.start(sumoCmd) step = 0 action_distribution = tuple() # Epoch Start setting env = TL3x3Env(configs) traci.trafficlight.setRedYellowGreenState(tl_rl_list[0], 'G{0}{3}rr{2}{3}rG{0}{3}rr{2}{3}r'.format( 'G'*configs['num_lanes'], 'G', 'r'*configs['num_lanes'], 'r')) before_action=torch.ones((1,len(tl_rl_list))) done = False total_reward = 0 reward = 0 arrived_vehicles = 0 # state initialization state = env.get_state() # Time Check a = time.time() while step < MAX_STEPS: action = agent.get_action(state) action_distribution += tuple(action.unsqueeze(1)) # action 을 정하고 # action이 before_actio과 같으면 yellow없이 진행하고 if before_action!=action: traci.trafficlight.setRedYellowGreenState( tl_rl_list[0], 'y'*28) arrived_vehicles += simulation_step(env, 5) step+=5 # environment에 적용 env.step(action) # action 적용함수 #적용 후 20초 진행 arrived_vehicles += simulation_step(env, 20) step+=20 next_state = env.get_state() # 다음스테이트 reward = env.get_reward() # 20초 지연된 보상 agent.save_replay(state, action, reward, next_state) # dqn agent.update(done) state = next_state total_reward += reward before_action=action # 20초 끝나고 yellow 4초 b = time.time() traci.close() print("time:", b-a) epoch += 1 # update hyper parameter agent.update_hyperparams(epoch) # lr and epsilon upate if epoch % agent.configs['target_update_period'] == 0: agent.target_update() # dqn # once in an epoch update tensorboard update_tensorboard(writer, epoch, env, agent, arrived_vehicles) print('======== {} epoch/ return: {} arrived number:{}'.format(epoch, total_reward, arrived_vehicles)) if epoch % 50 == 0: agent.save_weights( configs['file_name']+'_{}_{}'.format(time_data, epoch)) writer.close()
val_psnr = 0 val_ssim = 0 for i in range(len(valid_hr_imgs)): hr = valid_hr_imgs[i] lr = valid_lr_imgs[i] [lr, hr] = utils.datatype([lr, hr]) hr_expand = np.expand_dims(hr, axis=0) lr_expand = np.expand_dims(lr, axis=0) psnr, ssim, sr_expand = sess.run([PSNR, SSIM, tensor_sr], { tensor_lr: lr_expand, tensor_hr: hr_expand }) sr = np.squeeze(sr_expand) utils.update_tensorboard(epoch, writer, i, lr, sr, hr) val_psnr += psnr val_ssim += ssim val_psnr = val_psnr / len(valid_hr_imgs) val_ssim = val_ssim / len(valid_hr_imgs) if val_psnr > best_psnr: best_psnr = val_psnr best_epoch = epoch print('Saving new best model') ## save model saver.save(sess, os.path.join(checkpoint_dir, 'model.ckpt')) writer.add_scalar('Validate PSNR', val_psnr, epoch) writer.add_scalar('Validate SSIM', val_ssim, epoch)
def dqn_train(configs, time_data, sumoCmd): from Agent.dqn import Trainer if configs['model'] == 'base': from Env.Env import TL3x3Env elif configs['model'] == 'frap': from Env.FRAP import TL3x3Env NUM_EPOCHS = configs['num_epochs'] MAX_STEPS = configs['max_steps'] tl_rl_list = configs['tl_rl_list'] # init agent and tensorboard writer writer = SummaryWriter( os.path.join(configs['current_path'], 'training_data', time_data)) agent = Trainer(configs) # save hyper parameters agent.save_params(time_data) # init training epoch = 0 while epoch < NUM_EPOCHS: traci.start(sumoCmd) env = TL3x3Env(configs) traci.trafficlight.setRedYellowGreenState( tl_rl_list[0], 'G{0}{3}rr{2}{3}rG{0}{3}rr{2}{3}r'.format( 'G' * configs['num_lanes'], 'G', 'r' * configs['num_lanes'], 'r')) step = 0 done = False # state initialization # agent setting total_reward = 0 reward = 0 arrived_vehicles = 0 state = env.get_state() action_distribution = tuple() a = time.time() while step < MAX_STEPS: ''' # state=env.get_state(action) #partial하게는 env에서 조정 action=agent.get_action(state) env.step(action) reward=env.get_reward() next_state=env.get_state() # if traci.inductionloop.getLastStepVehicleNumber("0") > 0: store transition in D (experience replay) Sample random minibatch from D step += 1 state=next_state set yi ''' action = agent.get_action(state) action_distribution += tuple(action.unsqueeze(1)) env.step(action) # action 적용함수 for _ in range(20): # 10초마다 행동 갱신 traci.simulationStep() env.collect_state() step += 1 arrived_vehicles += traci.simulation.getArrivedNumber( ) # throughput next_state = env.get_state() # 다음스테이트 traci.trafficlight.setRedYellowGreenState(tl_rl_list[0], 'y' * 28) for _ in range(5): # 4번더 traci.simulationStep() env.collect_state() step += 1 arrived_vehicles += traci.simulation.getArrivedNumber( ) # throughput reward = env.get_reward() # 25초 지연된 보상 agent.save_replay(state, action, reward, next_state) # dqn agent.update(done) state = next_state total_reward += reward # 20초 끝나고 yellow 4초 agent.update_hyperparams(epoch) # lr and epsilon upate if epoch % 2 == 0: agent.target_update() # dqn b = time.time() traci.close() print("time:", b - a) epoch += 1 # once in an epoch update_tensorboard(writer, epoch, env, agent, arrived_vehicles) print('======== {} epoch/ return: {} arrived number:{}'.format( epoch, total_reward, arrived_vehicles)) if epoch % 50 == 0: agent.save_weights(configs['file_name'] + '_{}_{}'.format(time_data, epoch)) writer.close()
def ppo_train(configs, time_data, sumoCmd): from Agent.ppo import Trainer if configs['model'] == 'base': from Env.Env import TL3x3Env elif configs['model'] == 'frap': from Env.FRAP import TL3x3Env tl_rl_list = configs['tl_rl_list'] NUM_EPOCHS = configs['num_epochs'] MAX_STEPS = configs['max_steps'] # init agent and tensorboard writer agent = Trainer(configs) writer = SummaryWriter( os.path.join(configs['current_path'], 'training_data', time_data)) # save hyper parameters agent.save_params(time_data) # init training epoch = 0 ppo_update_step = 0 while epoch < NUM_EPOCHS: traci.start(sumoCmd) traci.trafficlight.setRedYellowGreenState( tl_rl_list[0], 'G{0}{3}rr{2}{3}rG{0}{3}rr{2}{3}r'.format( 'G' * configs['num_lanes'], 'G', 'r' * configs['num_lanes'], 'r')) env = TL3x3Env(configs) # env = GridEnv( configs) step = 0 done = False # state initialization # agent setting total_reward = 0 reward = 0 arrived_vehicles = 0 state = env.get_state() action_distribution = tuple() a = time.time() while step < MAX_STEPS: action = agent.get_action(state) action_distribution += tuple(action.unsqueeze(1)) env.step(action) # action 적용함수 ppo_update_step += 1 for _ in range(20): # 10초마다 행동 갱신 traci.simulationStep() env.collect_state() step += 1 arrived_vehicles += traci.simulation.getArrivedNumber( ) # throughput next_state = env.get_state() # 다음스테이트 traci.trafficlight.setRedYellowGreenState(tl_rl_list[0], 'y' * 28) for _ in range(5): # 4번더 traci.simulationStep() env.collect_state() step += 1 arrived_vehicles += traci.simulation.getArrivedNumber( ) # throughput reward = env.get_reward() # 25초 지연된 보상 agent.memory.rewards.append(reward) if step >= MAX_STEPS: done = True agent.memory.dones.append(done) state = next_state total_reward += reward if ppo_update_step % 400 == 0: agent.update() agent.update_hyperparams(epoch) # lr update ppo_update_step = 0 b = time.time() traci.close() print("time:", b - a) epoch += 1 # once in an epoch update_tensorboard(writer, epoch, env, agent, arrived_vehicles) print('======== {} epoch/ return: {} arrived number:{}'.format( epoch, total_reward, arrived_vehicles)) if epoch % 50 == 0: agent.save_weights(configs['file_name'] + '_{}_{}'.format(time_data, epoch)) writer.close()
def REINFORCE_train(configs, time_data, sumoCmd): from Agent.REINFORCE import Trainer from Agent.REINFORCE import DEFAULT_CONFIG from Env.Env import TL3x3Env tl_rl_list = configs['tl_rl_list'] NUM_EPOCHS = configs['num_epochs'] MAX_STEPS = configs['max_steps'] # init agent and tensorboard writer agent = Trainer(configs) writer = SummaryWriter( os.path.join(configs['current_path'], 'training_data', time_data)) # save hyper parameters agent.save_params(time_data) # init training epoch = 0 while epoch < NUM_EPOCHS: traci.start(sumoCmd) traci.trafficlight.setRedYellowGreenState( tl_rl_list[0], 'G{0}{3}rr{2}{3}rG{0}{3}rr{2}{3}r'.format( 'G' * configs['num_lanes'], 'G', 'r' * configs['num_lanes'], 'r')) env = TL3x3Env(configs) # env = GridEnv( configs) step = 0 done = False # state initialization # agent setting total_reward = 0 reward = 0 arrived_vehicles = 0 state = env.get_state() while step < MAX_STEPS: action = agent.get_action(state) env.step(action) # action 적용함수 next_state = env.get_state() for _ in range(20): # 10초마다 행동 갱신 traci.simulationStep() env.collect_state() step += 1 arrived_vehicles += traci.simulation.getArrivedNumber( ) # throughput # 20초 끝나고 yellow 4초 traci.trafficlight.setRedYellowGreenState(tl_rl_list[0], 'y' * 28) for _ in range(5): # 4번더 traci.simulationStep() env.collect_state() step += 1 arrived_vehicles += traci.simulation.getArrivedNumber( ) # throughput reward = env.get_reward() prob = agent.get_prob() agent.put_data((reward, prob[action])) state = next_state total_reward += reward if step > MAX_STEPS: done = True agent.update(done) agent.update_hyperparams(epoch) # lr and epsilon upate traci.close() epoch += 1 # once in an epoch update_tensorboard(writer, epoch, env, agent, arrived_vehicles) print('======== {} epoch/ return: {} arrived number:{}'.format( epoch, total_reward, arrived_vehicles)) writer.close()
def city_dqn_train(configs, time_data, sumoCmd): from Agent.super_dqn import Trainer if configs['model'] == 'city': from Env.CityEnv import CityEnv phase_num_matrix = torch.tensor( # 각 tl이 갖는 최대 phase갯수 [len(configs['traffic_node_info'][index]['phase_duration']) for _, index in enumerate(configs['traffic_node_info'])]) # init agent and tensorboard writer writer = SummaryWriter(os.path.join( configs['current_path'], 'training_data', time_data)) agent = Trainer(configs) # save hyper parameters agent.save_params(time_data) # init training NUM_AGENT = configs['num_agent'] DEVICE = configs['device'] TL_RL_LIST = configs['tl_rl_list'] MAX_PHASES = configs['max_phase_num'] MAX_STEPS = configs['max_steps'] OFFSET = torch.tensor(configs['offset'], # i*10 device=DEVICE, dtype=torch.int) TL_PERIOD = torch.tensor( configs['tl_period'], device=DEVICE, dtype=torch.int) epoch = 0 while epoch < configs['num_epochs']: step = 0 if configs['randomness'] == True: tmp_sumoCmd = sumoCmd+['--scale', str(1.5+random())] # 1.5~2.5 else: if configs['network'] == 'dunsan' or 'grid' in configs['network']: tmp_sumoCmd = sumoCmd+['--scale', str(configs['scale'])] else: tmp_sumoCmd = sumoCmd traci.start(tmp_sumoCmd) env = CityEnv(configs) # Total Initialization actions = torch.zeros( (NUM_AGENT, configs['action_size']), dtype=torch.int, device=DEVICE) # Mask Matrix : TL_Period가 끝나면 True mask_matrix = torch.zeros( (NUM_AGENT), dtype=torch.bool, device=DEVICE) # MAX Period까지만 증가하는 t t_agent = torch.zeros( (NUM_AGENT), dtype=torch.int, device=DEVICE) t_agent -= OFFSET # Action configs['offset']on Matrix : 비교해서 동일할 때 collect_state, 없는 state는 zero padding action_matrix = torch.zeros( (NUM_AGENT, MAX_PHASES), dtype=torch.int, device=DEVICE) # 노란불 3초 해줘야됨 action_index_matrix = torch.zeros( (NUM_AGENT), dtype=torch.long, device=DEVICE) # 현재 몇번째 phase인지 action_update_mask = torch.eq( # action이 지금 update해야되는지 확인 t_agent, action_matrix[0, action_index_matrix]).view(NUM_AGENT) # 0,인 이유는 인덱싱 # 최대에 도달하면 0으로 초기화 (offset과 비교) clear_matrix = torch.eq(t_agent % TL_PERIOD, 0) t_agent[clear_matrix] = 0 # action 넘어가야된다면 action index증가 (by tensor slicing) action_index_matrix[action_update_mask] += 1 action_index_matrix[clear_matrix] = 0 # mask update, matrix True로 전환 mask_matrix[clear_matrix] = True mask_matrix[~clear_matrix] = False # state initialization state = env.collect_state( action_update_mask, action_index_matrix, mask_matrix) total_reward = 0 # agent setting arrived_vehicles = 0 a = time.time() while step < MAX_STEPS: # action 을 정하고 actions = agent.get_action(state, mask_matrix) if mask_matrix.sum()>0: print(actions.transpose(1,2)) # action형태로 변환 # 다음으로 넘어가야할 시점에 대한 matrix action_matrix = env.calc_action( action_matrix, actions, mask_matrix) # 누적값으로 나타남 # environment에 적용 # action 적용함수, traci.simulationStep 있음 env.step( actions, mask_matrix, action_index_matrix, action_update_mask) # 전체 1초증가 # traci는 env.step에 step += 1 t_agent += 1 # 최대에 도달하면 0으로 초기화 (offset과 비교) clear_matrix = torch.eq(t_agent % TL_PERIOD, 0) # action 넘어가야된다면 action index증가 (by tensor slicing) for idx,_ in enumerate(TL_RL_LIST): action_update_mask[idx] = torch.eq( # update는 단순히 진짜 현시만 받아서 결정해야됨 t_agent[idx], action_matrix[idx, action_index_matrix[idx]].view(-1)) # 0,인 이유는 인덱싱 action_index_matrix[action_update_mask] += 1 # agent의 최대 phase를 넘어가면 해당 agent의 action index 0으로 초기화 action_index_matrix[clear_matrix] = 0 # mask update, matrix True로 전환 t_agent[clear_matrix] = 0 # print(t_agent,action_index_matrix,step,action_update_mask) mask_matrix[clear_matrix] = True mask_matrix[~clear_matrix] = False next_state = env.collect_state( action_update_mask, action_index_matrix, mask_matrix) # env속에 agent별 state를 꺼내옴, max_offset+period 이상일 때 시작 if step >= int(torch.max(OFFSET)+torch.max(TL_PERIOD)) and mask_matrix.sum() > 0: rep_state, rep_action, rep_reward, rep_next_state = env.get_state( mask_matrix) agent.save_replay(rep_state, rep_action, rep_reward, rep_next_state, mask_matrix) # dqn # update agent.update(mask_matrix) state = next_state # info arrived_vehicles += traci.simulation.getArrivedNumber() agent.target_update(epoch) agent.update_hyperparams(epoch) # lr and epsilon upate b = time.time() traci.close() print("time:", b-a) epoch += 1 # once in an epoch print('======== {} epoch/ return: {:.5f} arrived number:{}'.format(epoch, env.cum_reward.sum(), arrived_vehicles)) update_tensorboard(writer, epoch, env, agent, arrived_vehicles) env.test_val=0 if epoch % 50 == 0: agent.save_weights( configs['file_name']+'_{}'.format(epoch)) writer.close()