def test_SUMO(): #a SUMO environment #env = SUMO('nets/simple/simple-traci.sumocfg', 8813, False) env = SUMO('nets/OW/OW-traci.sumocfg', 8813, False) #an exploration strategy exp = EpsilonGreedy(epsilon=1, min_epsilon=0.1, decay_rate=0.99) #for each vehicle in the route file for vehID in env.get_vehicles_ID_list(): vehDic = env.get_vehicle_dict(vehID) #a reinforcement learner _ = QLearner(vehID, env, vehDic['origin'], vehDic['destination'], 0.3, 0.9, exp) #_ = WPL(vehID, env, vehDic['origin'], vehDic['destination'], 0.002, 0.1) #number of episodes n_episodes = 100 #sys.stdout = open('out.txt', 'w') #sys.stderr = open('err.txt', 'w') print 'ep\tavg tt\truntime' #for each episode for _ in xrange(n_episodes): #print "===== Episode %i ==========================================" % (i) env.run_episode(50000)
def test_SUMOTrafficLights(): print datetime.datetime.now().time() print 'SUMO traffic lights' # a SUMO environment env = SUMOTrafficLights('nets/3x3grid/3x3grid.sumocfg', 8813, False) # an exploration strategy exp = EpsilonGreedy(epsilon=1, min_epsilon=0.0, decay_rate=0.95, manual_decay=True) # for each traffic light in the net file for tlID in env.get_trafficlights_ID_list(): # create a learner _ = QLearner(tlID, env, 0, 0, 0.1, 0.8, exp) # number of episodes n_episodes = 100 # for each episode for i in xrange(n_episodes): # print queue length arq_avg_nome = 'tl_%d.txt' % (i) arq_tl = open(arq_avg_nome, 'w') #para salvar saida em um arquivo arq_tl.writelines('##%s## \n' % (datetime.datetime.now().time())) arq_tl.write('step,tl0,tl1,tl2,tl3,tl4,tl5,tl6,tl7,tl8,average,all\n') env.run_episode(28800, arq_tl, exp) arq_tl.close() print datetime.datetime.now().time()
def __init__(self, config): ''' :param int agentID: Agent's ID :param dict config: Dictionary containing hyperparameters ''' self.networks = [("cNet_", "tNet_")] super(DQN, self).__init__(config) self.getQTP1 = self.double if self.c.dqn.double == True else self.vanilla self.explore = EpsilonGreedy(self.cNet, config, self.sess)
def test_OPPORTUNE(): # a SUMO environment env = SUMO('nets/OW/OW-traci.sumocfg', 8813, False) # an exploration strategy exp = EpsilonGreedy(1, 0.925) #---------------------------------------------------------- #create a list (vehD) of vehicles with the OD-pair of each vehicle (each entry is in the form <O, D, "O###D">), #and also a list (OD_grouping) of vehicles grouped by OD-pair (each entry is the list of vehicles with same OD-pair); #the vehicles of the same OD-pair are considered neighbours vehD = {} OD_grouping = {} for vehID in env.get_vehicles_ID_list(): vehDic = env.get_vehicle_dict(vehID) ODpair = '%s###%s' % (vehDic['origin'], vehDic['destination']) vehD[vehID] = [ODpair, vehDic['origin'], vehDic['destination']] if ODpair not in OD_grouping: OD_grouping[ODpair] = [] OD_grouping[ODpair].append(vehID) #sort the lists of neighbours for k in OD_grouping.keys(): OD_grouping[k].sort() # create the communication layer among the learners OCL = OPPORTUNECommLayer() #create the learners for vehID in env.get_vehicles_ID_list(): # create the list of neighbours of vehID (in this example, such a # list is comprised by all vehicles from the same OD pair as vehID) Ni = list(OD_grouping[vehD[vehID][0]]) Ni.remove(vehID) # create the learner corresponding to vehID _ = OPPORTUNE(vehID, env, vehD[vehID][1], vehD[vehID][2], 0.3, 0.9, 0.001, exp, Ni, OCL) # vehDic = env.get_vehicle_dict(vehID) # #a reinforcement learner # _ = QLearner(vehID, env, vehDic['origin'], vehDic['destination'], 0.3, 0.9, exp) #---------------------------------------------------------- # number of episodes n_episodes = 1000 print 'ep\tavg tt\truntime' # for each episode for _ in xrange(n_episodes): env.run_episode(50000)
def __init__(self, config): ''' :param int agentID: Agent's ID :param dict config: Dictionary containing hyperparameters ''' super(Leniency, self).__init__(config) self.index = None self._index_tp1 = None if config.dqn.exploration== 'epsGreedy': self.explore = EpsilonGreedy(self.cNet, config, self.sess) elif config.dqn.exploration== 'tBarGreedy': self.explore = TGreedy(self.cNet, self.replay_memory, config, self.sess)
def test_SUMORouteChoice(): # a SUMO environment env = SUMORouteChoice('nets/OW/OW-traci.sumocfg', 8813, False) # convert the SUMO net file to the one accepted by KSP #misc.convert_SUMO_to_KSP('nets/OW/OW-traci.sumocfg') # create a set of routes for each OD-pair (through KSP algorithm), # and define one such set for each OD-pair (these sets will correspond # to the actions available on each state) pairs = env.get_OD_pairs() for origin, destination in pairs: RKSP = KSP.getKRoutesNetFile('nets/OW/OW_for_KSP.net', origin, destination, 4) routes = [" ".join(r[0]) for r in RKSP] env.set_routes_OD_pair(origin, destination, routes) # an exploration strategy exp = EpsilonGreedy(epsilon=1, min_epsilon=0.1, decay_rate=0.99) # for each vehicle in the route file for vehID in env.get_vehicles_ID_list(): vehDic = env.get_vehicle_dict(vehID) # in the SUMORouteChoice environment the origin is an encoding of the OD-pair origin = env.encode_OD(vehDic['origin'], vehDic['destination']) # create a learner _ = QLearner(vehID, env, origin, vehDic['destination'], 0.8, 0.9, exp) #_ = WPL(vehID, env, origin, vehDic['destination'], 0.002, 0.1) #print '%s (%s,%s) is in %s'%(Q.get_name(), vehDic['origin'], vehDic['destination'], Q.get_state()) # number of episodes n_episodes = 100 print 'ep\tavg tt\truntime' # for each episode for _ in xrange(n_episodes): env.run_episode(50000)
# import the environment or raise an error try: env = gym.make('ma_gym:Switch2-v0') except: raise ImportError # get the initial state state = env.reset() # discretize the state state = discretize_switch(state) # instatiate the agents ql_agents = [ Agent(state[i], env.observation_space[i], env.action_space[i], alpha, gamma, EpsilonGreedy(epsilon, min_epsilon, decay)) for i in range(2) ] while True: # get the initial state state = env.reset() # discretize the state state = discretize_switch(state) # initialize the dones for every agent done = [False for _ in ql_agents] while not all(done): actions = [agent.choose_action() for agent in ql_agents]
type=int, dest="btw_gap", default=1000, help="Gap of time to recalculate betweenness (default = 500)") options = parse.parse_args() if not options.cfgfile: print('Wrong usage of script!') print() parse.print_help() sys.exit() env = SUMO(options.cfgfile, use_gui=options.gui, time_before_learning=options.wait_learn, max_veh=options.demand, calc_btw_gap=options.btw_gap) agents = list() for veh in env.get_vehicles_ID_list(): veh_dict = env.get_vehicle_dict(veh) exp = EpsilonGreedy(0.05, 0, -1) agent = QLearner(veh, env, veh_dict['origin'], veh_dict['destination'], 0.5, 0.9, exp) agents.append(agent) env.register_agents(agents) env.update_c2i_params(options.c2i, options.sr) env.run_episode(options.steps, options.mav)
if args.reward == 'queue': env._compute_rewards = env._queue_average_reward else: env._compute_rewards = env._waiting_time_reward for run in range(1, args.runs + 1): initial_states = env.reset() ql_agents = { ts: QLAgent(starting_state=initial_states[ts], state_space=env.observation_space, action_space=env.action_space, alpha=args.alpha, gamma=args.gamma, exploration_strategy=EpsilonGreedy( initial_epsilon=args.epsilon, min_epsilon=args.min_epsilon, decay=args.decay)) for ts in env.ts_ids } done = False infos = [] if args.fixed: while not done: _, _, done, info = env.step({}) infos.append(info) else: while not done: actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()} s, r, done, info = env.step(actions=actions)
traci.trafficlight.Phase(35000, 35000, 35000, "rrrGGG"), # west-east traci.trafficlight.Phase(2000, 2000, 2000, "rrryyy") ]) for run in range(1, runs + 1): initial_states = env.reset() ql_agents = { ts: QLAgent(starting_state=env.encode(initial_states[ts]), state_space=env.observation_space, action_space=env.action_space, alpha=alpha, gamma=gamma, exploration_strategy=EpsilonGreedy(initial_epsilon=0.05, min_epsilon=0.005, decay=decay)) for ts in env.ts_ids } infos = [] done = {'__all__': False} while not done['__all__']: actions = {ts: ql_agents[ts].act() for ts in ql_agents.keys()} s, r, done, info = env.step(actions=actions) infos.append(info) for agent_id in ql_agents.keys(): ql_agents[agent_id].learn(new_state=env.encode(s[agent_id]), reward=r[agent_id])
def main(): date = datetime.now().strftime('%Y%m%d_%H%M%S') parser = argparse.ArgumentParser() parser.add_argument('--config', type=str, default='config/global_config.json') parser.add_argument('--num_step', type=int, default=3000, help='number of timesteps for one episode, and for inference') parser.add_argument('--algo', type=str, default='DQN', choices=['DQN', 'DDQN', 'DuelDQN'], help='choose an algorithm') parser.add_argument('--inference', action="store_true", help='inference or training') parser.add_argument('--ckpt', type=str, help='inference or training') parser.add_argument('--epoch', type=int, default=30, help='number of training epochs') parser.add_argument('--save_freq', type=int, default=100, help='model saving frequency') args = parser.parse_args() # preparing config # # for environment config = json.load(open(args.config)) config["num_step"] = args.num_step # config["replay_data_path"] = "replay" cityflow_config = json.load(open(config['cityflow_config_file'])) roadnetFile = cityflow_config['dir'] + cityflow_config['roadnetFile'] config["lane_phase_info"] = parse_roadnet(roadnetFile) # # for agent intersection_id = list(config['lane_phase_info'].keys())[0] phase_list = config['lane_phase_info'][intersection_id]['phase'] logging.info(phase_list) state_size = config["state_size"] = len(config['lane_phase_info'][intersection_id]['start_lane']) + 1 #state_size = config["state_size"] = 25 # the single dimension appended to the tail is for the current phase. # [vehicle_count for each start lane] + [current_phase] logging.info('state size:%s' % state_size) config["action_size"] = len(phase_list) phase_list = [1,2,3,4,5,6,7,8] # build cityflow environment env = CityFlowEnv(config) EPISODES = 1 num_step = config['num_step'] state_size = config['state_size'] total_step = 0 #num_step = 10 with tqdm(total=EPISODES*args.num_step) as pbar: for i in range(1, EPISODES+1): logging.info('EPISODE >>:%s' % i) episode_length = 1 env.reset() t=0 state = env.get_state() state = np.array(list(state['start_lane_vehicle_count'].values()) + [ state['current_phase']]) # a sample state definition # print ('state1:', state) state = np.reshape(state, [1, state_size]) print('state2:', state) agent = QLAgent(starting_state=env.get_rl_state(), state_space=1, action_space=env.action_space, alpha=0.1, gamma=0.99, exploration_strategy=EpsilonGreedy(initial_epsilon=0.05, min_epsilon=0.005, decay=1.0)) last_action = phase_list[agent.act(state)] print('last action:', last_action) print('episode_length:{}, num_step:{}'.format(episode_length, num_step)) while episode_length < num_step: #logging.info('current state:%s' % state) logging.info('EPISODE LENGTH >>%s' % episode_length) action = agent.act(state) # index of action logging.info('new action:%s' % action) action_phase = phase_list[action] # actual action logging.info('action phase:>>%s' % action_phase) next_state, reward = env.step(action_phase) # one step logging.info('STATE>>:%s' % next_state) logging.info('ACTION PHASE:{}'.format(action_phase)) logging.info('ELAPSED TIME ON PHASE {} is {}'.format(env.current_phase, env.current_phase_time)) logging.info('NORM ELAPSED TIME ON PHASE {} is {}'.format(env.current_phase, env.get_elapsed_time())) #for n_s in next_state.iteritems(): # logging.info(n_s) logging.info('REWARD:%s' % reward) # last_action_phase = action_phase episode_length += 1 total_step += 1 pbar.update(1) # store to replay buffer # prepare state agent.learn(new_state=env.get_rl_state(), reward=reward) env._compute_step_info() state = next_state logging.info("episode:{}/{}, total_step:{}, action:{}, reward:{}" .format(i, EPISODES, total_step, action, reward)) pbar.set_description("total_step:{total_step}, episode:{i}, episode_step:{episode_length}, " "reward:{reward}") env.save_csv()
def test_OPPORTUNE_route_choice(): # a SUMO environment env = SUMORouteChoice('nets/OW/OW-traci.sumocfg', 8813, False) # convert the SUMO net file to the one accepted by KSP #misc.convert_SUMO_to_KSP('nets/OW/OW-traci.sumocfg') # create a set of routes for each OD-pair (through KSP algorithm), # and define one such set for each OD-pair (these sets will correspond # to the actions available on each state) pairs = env.get_OD_pairs() for origin, destination in pairs: RKSP = KSP.getKRoutesNetFile('nets/OW/OW_for_KSP.net', origin, destination, 4) routes = [" ".join(r[0]) for r in RKSP] env.set_routes_OD_pair(origin, destination, routes) # an exploration strategy exp = EpsilonGreedy(0.05, 0) #---------------------------------------------------------- #create a list (vehD) of vehicles with the OD-pair of each vehicle (each entry is in the form <O, D, "O###D">), #and also a list (OD_grouping) of vehicles grouped by OD-pair (each entry is the list of vehicles with same OD-pair); #the vehicles of the same OD-pair are considered neighbours vehD = {} OD_grouping = {} for vehID in env.get_vehicles_ID_list(): vehDic = env.get_vehicle_dict(vehID) ODpair = env.encode_OD(vehDic['origin'], vehDic['destination']) vehD[vehID] = [ODpair, vehDic['origin'], vehDic['destination']] if ODpair not in OD_grouping: OD_grouping[ODpair] = [] OD_grouping[ODpair].append(vehID) #sort the lists of neighbours for k in OD_grouping.keys(): OD_grouping[k].sort() # create the communication layer among the learners OCL = OPPORTUNECommLayer() #create the learners for vehID in env.get_vehicles_ID_list(): # create the list of neighbours of vehID (in this example, such a # list is comprised by all vehicles from the same OD pair as vehID) Ni = list(OD_grouping[vehD[vehID][0]]) Ni.remove(vehID) # in the SUMORouteChoice environment the origin is an encoding of the OD-pair origin = vehD[vehID][0] # create the learner corresponding to vehID _ = OPPORTUNE(vehID, env, origin, vehD[vehID][2], 0.5, 0.9, 0.05, exp, Ni, OCL) #---------------------------------------------------------- # number of episodes n_episodes = 10000 print 'ep\tavg tt\truntime' # for each episode for _ in xrange(n_episodes): env.run_episode(50000)