def reset(self): print("Calling the reset method! ") self.model = Model( ZONE_IDS, DEMAND_SOURCE, WARMUP_TIME_HOUR, ANALYSIS_TIME_HOUR, FLEET_SIZE=self.config["fleet_size"], PRO_SHARE=self.config["pro_s"], SURGE_MULTIPLIER=self.config["surge"], bonus=self.config["bonus"], percent_false_demand=self.config["percent_false_demand"], percentage_know_fare=self.config["perc_k"], AV_share=self.config["av_share"], ) self.total_reward = 0.0 self.T = WARMUP_TIME_SECONDS self.old_income = 0 state_n = [] for _, veh in enumerate([v for v in self.model.av_vehs]): state_n.append(self.model.get_state(veh, self.T)) return state_n
def reset(self): """ Restarts the gym environment by resetting all parameters to default. @return: the modified state. """ print("Calling the reset method! ") self.model = Model( ZONE_IDS, DEMAND_SOURCE, WARMUP_TIME_HOUR, ANALYSIS_TIME_HOUR, FLEET_SIZE=self.config["fleet_size"], PRO_SHARE=self.config["pro_s"], SURGE_MULTIPLIER=self.config["surge"], bonus=self.config["bonus"], percent_false_demand=self.config["percent_false_demand"], percentage_know_fare=self.config["perc_k"], ) veh = self.model.vehilcs[-1] veh.is_AV = True self.total_reward = 0.0 self.T = WARMUP_TIME_SECONDS self.old_income = 0 self.update_state() # self.amods.append( copy.deepcopy(self.amod) ) return self.state
def __init__(self, config): super(gym.Env, self).__init__() print("INSIDE INIT FUNCTION") print(config["av_share"]) self.config = config self.model = Model( ZONE_IDS, DEMAND_SOURCE, WARMUP_TIME_HOUR, ANALYSIS_TIME_HOUR, FLEET_SIZE=config["fleet_size"], PRO_SHARE=config["pro_s"], SURGE_MULTIPLIER=config["surge"], bonus=config["bonus"], percent_false_demand=config["percent_false_demand"], percentage_know_fare=config["perc_k"], AV_share=config["av_share"], ) self.dT = INT_REBL self.action_space = spaces.Discrete(len(ZONE_IDS)) # why not define an observation space? self.state = np.zeros((len(ZONE_IDS), 3)) self.observation_space = np.zeros((len(ZONE_IDS), 3)) # self.center = np.zeros((Mlng, Mlat, 2)) self.input_dim = 3 * len(ZONE_IDS) self.step_count = 0 self.epi_count = 0 self.total_reward = 0.0 self.T = WARMUP_TIME_SECONDS self.old_income = 0
def reset(self): data_instance = Data.init_from_config_dic(config_dict) self.model = Model(data_instance) self.T = WARMUP_TIME_SECONDS # run the warm up period for t in range(self.T, self.T + 3600, INT_ASSIGN): self.model.dispatch_at_time(t) self.T = ANALYSIS_TIME_SECONDS print("##########################") print("##########################") print("End of the warm up time ") print("##########################") print("##########################")
def __init__(self, config, penalty=-10): """ @param config: @param penalty: """ print("INSIDE INIT FUNCTION") self.config = config self.model = Model( ZONE_IDS, DEMAND_SOURCE, WARMUP_TIME_HOUR, ANALYSIS_TIME_HOUR, FLEET_SIZE=config["fleet_size"], PRO_SHARE=config["pro_s"], SURGE_MULTIPLIER=config["surge"], bonus=config["bonus"], percent_false_demand=config["percent_false_demand"], percentage_know_fare=config["perc_k"], ) veh = self.model.vehilcs[-1] veh.is_AV = True # else: # print # self.model = model # self._model_ = copy.deepcopy(model) self.dT = INT_REBL self.penalty = penalty self.action_space = spaces.Discrete(len(ZONE_IDS)) # why not define an observation space? self.state = np.zeros((len(ZONE_IDS), 3)) # self.center = np.zeros((Mlng, Mlat, 2)) self.input_dim = 3 * len(ZONE_IDS) self.step_count = 0 self.epi_count = 0 self.total_reward = 0.0 self.T = WARMUP_TIME_SECONDS self.old_income = 0
def main(): parser = argparse.ArgumentParser( description="Simulation of drivers' behavior") # from lib.Constants import PERCE_KNOW parser.add_argument( '-f', '--fleet', help= 'Fleet sizes to simulate, formatted as comma-separated list (i.e. "-f 250,275,300")' ) parser.add_argument( '-m', '--multiplier', help= 'Surge multiplier, formatted as comma-separated list (i.e. "-m 1,1.5,2")' ) parser.add_argument('-b', '--bonus', type=int, help='Bonus') parser.add_argument('-d', '--demand', help='Percent false demand ') parser.add_argument('-AV', '--AV_fleet_size', help="Number of Naive drivers ") parser.add_argument('-NAIVE', '--NAIVE_fleet_size', help="Number of Naive drivers ") parser.add_argument('-PRO', '--PRO_fleet_size', help="Number of Professional drivers ") parser.add_argument( '-BH', '--behavioral_opt', help="Perform behavioral optimization, pass 'yes' or 'no' ") parser.add_argument('-SURGE', '--surge_pricing', help="should do surge pricing, pass 'yes' or 'no' ") parser.add_argument( '-k', '--know', help= 'Percent knowing fare, formatted as comma-separated list (i.e. "-m 1,1.5,2") ' ) parser.add_argument( '-p', '--pro', help= 'Percent pro drivers, formatted as comma-separated list (i.e. "-m 1,1.5,2") ' ) parser.add_argument('-r', '--replications', help='number of times to run the simulation') parser.add_argument('-bb', '--beta', help='BETA') parser.add_argument('-b_policy', '--bonus_policy', help='bonus per zone ') parser.add_argument('-budget', '--budget', help='budget ') args = parser.parse_args() # TODO: argpars should get the bonus policy as input data_instance = Data() if args.fleet: fleet_sizes = [int(args.fleet)] else: fleet_sizes = data_instance.FLEET_SIZE if args.behavioral_opt: if args.behavioral_opt.lower() in ('yes', 'true'): do_behavioral_opt = True else: do_behavioral_opt = False else: do_behavioral_opt = data_instance.do_behavioral_opt if args.surge_pricing: if args.surge_pricing.lower() in ('yes', 'true'): do_surge_pricing = True else: do_surge_pricing = False else: do_surge_pricing = data_instance.do_surge_pricing if args.PRO_fleet_size: set_of_NUM_OF_PRO_DRIVERS = [int(args.PRO_fleet_size)] else: set_of_NUM_OF_PRO_DRIVERS = [data_instance.PRO_FLEET_SIZE] if args.NAIVE_fleet_size: set_of_NUM_OF_NAIVE_DRIVERS = [int(args.NAIVE_fleet_size)] else: set_of_NUM_OF_NAIVE_DRIVERS = [data_instance.NAIVE_FLEET_SIZE] if args.AV_fleet_size: set_of_NUM_OF_AV_DRIVERS = [int(args.AV_fleet_size)] else: set_of_NUM_OF_AV_DRIVERS = [data_instance.AV_FLEET_SIZE] if args.multiplier: # surge = args.multiplier surges = [float(x) for x in args.multiplier.split(',')] else: surges = [data_instance.SURGE_MULTIPLIER] if args.know: perc_know = [float(args.know)] else: perc_know = [data_instance.PERCE_KNOW] if args.bonus: bonus = args.bonus else: bonus = data_instance.BONUS if args.beta: beta = float(args.beta) else: beta = configs_dict["BETA"] if args.pro: pro_share = [float(x) for x in args.pro.split(',')] else: pro_share = [data_instance.PRO_SHARE] if args.demand: percent_false_demand = float(args.demand) else: percent_false_demand = data_instance.PERCENT_FALSE_DEMAND if args.replications: n_rep = int(args.replications) else: n_rep = 1 if args.bonus_policy: bonus_policy = args.bonus_policy else: bonus_policy = data_instance.BONUS_POLICY if args.budget: budget = args.budget else: budget = data_instance.BUDGET # output_path = "./Outputs/avg_fare_info/" + str(beta) + "/" from lib.rebalancing_optimizer import RebalancingOpt for num_pros in set_of_NUM_OF_PRO_DRIVERS: for num_naives in set_of_NUM_OF_NAIVE_DRIVERS: for num_avs in set_of_NUM_OF_AV_DRIVERS: for surge in surges: for repl in range(n_rep): TOTAL_FLEET_SIZE = 2500 num_naives = TOTAL_FLEET_SIZE - num_pros data_instance.AV_FLEET_SIZE = num_avs data_instance.NAIVE_FLEET_SIZE = num_naives data_instance.PRO_FLEET_SIZE = num_pros data_instance.do_behavioral_opt = do_behavioral_opt data_instance.do_surge_pricing = do_surge_pricing if do_behavioral_opt: st = '/with_behavioral_opt/' else: st = '/no_behavioral_opt/' output_path = "./Outputs/" + datetime.datetime.now().strftime("%Y-%m-%d-%H-%M") + \ st + str('Pro_') + str(num_pros) \ + str('NAIVE_') + str(num_naives) \ + str('AV_') + str(num_avs) \ + str('budget_') + str(budget) + "_" \ + str("bonus_policy") + "_" + str(bonus_policy) + "_" \ + str('do_surge') + "_" + str(data_instance.do_surge_pricing) + "_" \ + str('do_opt') + "_" + str(data_instance.do_behavioral_opt) + "_" \ + str(datetime.datetime.now()).split('.')[0] + "/" if not os.path.exists(output_path): os.makedirs(output_path) print("iteration number ", repl) print('Surge is {}'.format(surge)) data_instance.SURGE_MULTIPLIER = surge data_instance.BONUS = bonus data_instance.output_path = output_path # data_instance.do_behavioral_opt = False m = Model(data_instance, configs_dict, beta, output_path) # start time stime = time.time() # dispatch the system for T_TOTAL seconds, at the interval of INT_ASSIGN # TODO: every run should in include the policy from the start # TODO: process Feb's month as well. months = [1, 2] days = [30, 15] stop_month = months[-1] for ix, month in enumerate(months): for d_idx in range(1, days[ix]): stop_day = days[ix] if month == 1 and d_idx >= 15: # NOTE: THIS WILL NOT HAVE THE DESIRED EFFECT, BC OPERATOR has attribute set in the beginning data_instance.do_behavioral_opt = True m.operator.do_behavioral_opt = True # data_instance.do_surge_pricing = True # m.operator.do_surge_pricing = True for T in range( data_instance.WARMUP_TIME_SECONDS, data_instance.T_TOTAL_SECONDS, data_instance.INT_ASSIGN): m.dispatch_at_time(T, day_idx=d_idx) m.get_service_rate_per_zone(d_idx, month) m.get_drivers_earnings_for_one_day( d_idx, month) m.get_operators_earnings_for_one_day( d_idx, month) print( f"Starting a new day, finished day number {d_idx + 1} of month {month}" ) print(f"it took {(time.time() - stime) / 60}") m.reset_after_one_day_of_operation( stop_month, stop_day) if num_pros > 0: all_dfs = pd.concat([ v.report_learning_rates() for v in m.vehicles if v.driver_type == DriverType.PROFESSIONAL ], ignore_index=True) all_dfs.to_csv(output_path + "fmean for all drivers.csv") all_fare_reliability_dfs = pd.concat( [ v.report_fare_reliability_evolution() for v in m.vehicles if v.driver_type == DriverType.PROFESSIONAL ], ignore_index=True) all_fare_reliability_dfs.to_csv( output_path + "fare reliability for all drivers.csv") all_m_reliability_dfs = pd.concat( [ v.report_matching_reliability_evolution() for v in m.vehicles if v.driver_type == DriverType.PROFESSIONAL ], ignore_index=True) all_m_reliability_dfs.to_csv( output_path + "matching reliability for all drivers.csv") all_fare_reliability_dfs = pd.concat( [ v.report_surge_bonus_behavior() for v in m.vehicles if v.driver_type == DriverType.PROFESSIONAL ], ignore_index=True) all_fare_reliability_dfs.to_csv( output_path + "surge behavior for all drivers.csv") all_earning_dfs = pd.concat( [v.report_final_earnings() for v in m.vehicles], ignore_index=True) all_earning_dfs.to_csv(output_path + "earnings for all drivers.csv") operators_revenue = m.operator.report_final_revenue() operators_revenue.to_csv(output_path + "operators_revenue.csv") print('Total drivers: ', len(m.vehicles)) print( '# of Pro drivers: ', len([ v for v in m.vehicles if v.driver_type == DriverType.PROFESSIONAL ])) print( '# of naive drivers: ', len([ v for v in m.vehicles if v.driver_type == DriverType.NAIVE ])) print( '# of inexperienced drivers: ', len([ v for v in m.vehicles if v.driver_type == DriverType.INEXPERIENCED ])) # end time etime = time.time() # run time of this simulation runtime = etime - stime print("The run time was {runtime} minutes ".format( runtime=runtime / 60)) report = m.report_final_performance() # So that it doesn't save a file with 1.5.py, rather 15.py ss = str(surge).split('.') ss = ''.join(ss) fleet_size = num_avs + num_pros + num_naives report.to_csv(output_path + "report for fleet size " + str(fleet_size) + " surge " + str(ss) + "pro_ " + str(num_pros) + "naive_ " + str(num_naives) + "AV_" + str(num_avs) + " repl" + str(repl) + ".csv")
class BonusEnv(MultiAgentEnv): def __init__(self, env_config): # data_instance = Data.init_from_config_dic(config_dict) # self.model = Model(data_instance) # self.T = WARMUP_TIME_SECONDS self.min_action = 0 self.max_action = config_dict["MAX_BONUS"] self.SHARED_REWARD = env_config.get("SHARED_REWARD", False) # define action/state space per zone # https://github.com/openai/gym/wiki/Table-of-environments # https://github.com/openai/gym/blob/master/gym/envs/classic_control/continuous_mountain_car.py # https://github.com/openai/multiagent-particle-envs/blob/master/bin/interactive.py self.action_space = spaces.Box(low=self.min_action, high=self.max_action, shape=(1, ), dtype=np.float32) self.reset() def step(self, action_n): """ for z in zones: set the bonus value action_n is {z_id:bonus} instead of the operator, the env sets the bonus values """ for zone in self.model.zones: requested_bonus = action_n[zone.id] # make sure it is within bounds requested_bonus = min(max(requested_bonus, self.min_action), self.max_action) if requested_bonus <= self.model.budget: self.model.budget -= requested_bonus zone.set_bonus(requested_bonus) # simulate for a while (e.g., 10 mins) for t in range(self.T, self.T + POLICY_UPDATE_INTERVAL, INT_ASSIGN): self.model.dispatch_at_time(t) # observe the next state states = self._get_states() # observe the (global) reward reward = self._get_reward_unserved() # other info info = self._get_info() # update the clock self.T += POLICY_UPDATE_INTERVAL # done flag done = self._is_done() return states, reward, done, info def reset(self): data_instance = Data.init_from_config_dic(config_dict) self.model = Model(data_instance) self.T = WARMUP_TIME_SECONDS # run the warm up period for t in range(self.T, self.T + 3600, INT_ASSIGN): self.model.dispatch_at_time(t) self.T = ANALYSIS_TIME_SECONDS print("##########################") print("##########################") print("End of the warm up time ") print("##########################") print("##########################") def _get_states(self): """ Place holder. should return info per zone :return: """ demand, supply = self.model.get_both_supply_and_demand_per_zone(self.T) return demand def _get_stats(self): self.stats = [z.generate_performance_stats() for z in self.model.zones] def _get_reward(self): if self.SHARED_REWARD: rew = np.sum([ z.reward_dict[np.ceil(self.T / POLICY_UPDATE_INTERVAL)] for z in self.model.zones ]) return {z.id: rew for z in self.model.zones} else: return {z.id: 1 for z in self.model.zones} def _get_reward_unserved(self): return np.sum([ z.generate_performance_stats(self.T)[3] for z in self.model.zones ]) def _get_info(self): return None def render(self, mode='human'): pass def _is_done(self): return self.T == T_TOTAL_SECONDS
class RebalancingEnv(gym.Env): """ RebalancingEnv is the environment class for DQN Attributes: model: AMoD system to train dT: time interval for training penalty: penalty of rebalancing a vehicle action_space: action space state: the system state. It's (ui, vi, cik) for every zone, where cik is the cost of going to i. e.g., 67 zones -> 67 * 3. center: the centroid of cells input_dim: input dimension """ def __init__(self, config, penalty=-10): """ @param config: @param penalty: """ print("INSIDE INIT FUNCTION") self.config = config self.model = Model( ZONE_IDS, DEMAND_SOURCE, WARMUP_TIME_HOUR, ANALYSIS_TIME_HOUR, FLEET_SIZE=config["fleet_size"], PRO_SHARE=config["pro_s"], SURGE_MULTIPLIER=config["surge"], bonus=config["bonus"], percent_false_demand=config["percent_false_demand"], percentage_know_fare=config["perc_k"], ) veh = self.model.vehilcs[-1] veh.is_AV = True # else: # print # self.model = model # self._model_ = copy.deepcopy(model) self.dT = INT_REBL self.penalty = penalty self.action_space = spaces.Discrete(len(ZONE_IDS)) # why not define an observation space? self.state = np.zeros((len(ZONE_IDS), 3)) # self.center = np.zeros((Mlng, Mlat, 2)) self.input_dim = 3 * len(ZONE_IDS) self.step_count = 0 self.epi_count = 0 self.total_reward = 0.0 self.T = WARMUP_TIME_SECONDS self.old_income = 0 def step(self, action): """ Performs one step of the environment. @param action: a vector of length N_AV, which contains the target zone for idle veh, and inaction for busy ones implements action, returns new state, reward. @return: observed state, reward, flag @note: Currently the DQN is inside the model.dispatch_at_time function """ flag = False self.step_count += 1 reward = 0 # AV veh = self.model.vehilcs[-1] # As long as a decision for AV is not needed, keep simulating while not veh.should_move(): T = self.T T_ = self.T + INT_ASSIGN # dispatch the system for INT_ASSIGN seconds while T < T_: self.model.dispatch_at_time(T, self.penalty) T += INT_ASSIGN self.T = self.T + INT_ASSIGN # check and see if the AV is ready to move. If not, keep simulating print("AV should move ") T = self.T T_ = self.T + INT_ASSIGN # move it while T < T_: self.model.dispatch_at_time(T, self.penalty, action) T += INT_ASSIGN self.T = self.T + INT_ASSIGN # calculate the reward of that action total_new_income = np.sum(veh.profits) - self.old_income self.old_income = np.sum(veh.profits) reward += total_new_income self.update_state() # print("T_TOTAL_SECONDS",T_TOTAL_SECONDS) # print("self.T", self.T) if self.T >= T_TOTAL_SECONDS: flag = True print("Episode is done!") return self.state, reward, flag, {} def update_state(self, vid=-1): """ Updates the state to be the state of a vehicle. @param vid: "vehicle list index" that chooses a vehicle for which to get the state. @return: state of the vehicle """ veh = self.model.vehilcs[vid] self.state = self.model.get_state(veh) def reset(self): """ Restarts the gym environment by resetting all parameters to default. @return: the modified state. """ print("Calling the reset method! ") self.model = Model( ZONE_IDS, DEMAND_SOURCE, WARMUP_TIME_HOUR, ANALYSIS_TIME_HOUR, FLEET_SIZE=self.config["fleet_size"], PRO_SHARE=self.config["pro_s"], SURGE_MULTIPLIER=self.config["surge"], bonus=self.config["bonus"], percent_false_demand=self.config["percent_false_demand"], percentage_know_fare=self.config["perc_k"], ) veh = self.model.vehilcs[-1] veh.is_AV = True self.total_reward = 0.0 self.T = WARMUP_TIME_SECONDS self.old_income = 0 self.update_state() # self.amods.append( copy.deepcopy(self.amod) ) return self.state
def main(): """ Parses command line arguments, sets training environment parameters, creates deep Q-network and trains it on gym environment. """ parser = argparse.ArgumentParser( description="Simulation of drivers' behavior") parser.add_argument( '-f', '--fleet', help= 'Fleet sizes to simulate, formatted as comma-separated list (i.e. "-f 250,275,300")' ) parser.add_argument( '-m', '--multiplier', help= 'Surge multiplier, formatted as comma-separated list (i.e. "-m 1,1.5,2")' ) parser.add_argument('-b', '--bonus', type=int, help='Bonus') parser.add_argument('-d', '--demand', help='Percent false demand ') parser.add_argument( '-k', '--know', help= 'Percent knowing fare, formatted as comma-separated list (i.e. "-m 1,1.5,2") ' ) parser.add_argument( '-p', '--pro', help= 'Percent pro drivers, formatted as comma-separated list (i.e. "-p 1,1.5,2") ' ) parser.add_argument( '-av', '--av', help= 'Percent AV drivers, formatted as comma-separated list (i.e. "-av 1,1.5,2") ' ) parser.add_argument('-nb', '--nb', help='number of steps to train Rl ') args = parser.parse_args() if args.fleet: fleet_sizes = [int(x) for x in args.fleet.split(',')] # fleet_sizes = args.fleet else: fleet_sizes = FLEET_SIZE if args.multiplier: # surge = args.multiplier surges = [float(x) for x in args.multiplier.split(',')] else: surges = [SURGE_MULTIPLIER] if args.know: # surge = args.multiplier perc_know = [float(x) for x in args.know.split(',')] else: perc_know = [PERCE_KNOW] if args.bonus: bonus = args.bonus else: bonus = BONUS if args.pro: pro_share = [float(x) for x in args.pro.split(',')] else: pro_share = [PRO_SHARE] if args.demand: percent_false_demand = float(args.demand) else: percent_false_demand = PERCENT_FALSE_DEMAND if args.av: av_share = [float(x) for x in args.av.split(',')] else: av_share = [1] if args.nb: nb_steps = args.nb else: nb_steps = 300 for fleet_size in fleet_sizes: for surge in surges: for perc_k in perc_know: for pro_s in pro_share: m = Model(ZONE_IDS, DEMAND_SOURCE, WARMUP_TIME_HOUR, ANALYSIS_TIME_HOUR, fleet_size=fleet_size, pro_share=pro_s, surge_multiplier=surge, bonus=bonus, percent_false_demand=percent_false_demand, percentage_know_fare=perc_k) # make one veh to be AV veh = m.vehilcs[-1] veh.is_AV = True # env = RebalancingEnv(m, penalty=-0) nb_actions = env.action_space.n input_shape = (1, ) + env.state.shape input_dim = env.input_dim model = Sequential() model.add(Flatten(input_shape=input_shape)) model.add(Dense(256, activation='relu')) model.add(Dense(nb_actions, activation='linear')) memory = SequentialMemory(limit=2000, window_length=1) policy = EpsGreedyQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, policy=policy, gamma=.99) dqn.compile(Adam(lr=0.001, epsilon=0.05, decay=0.0), metrics=['mae']) dqn.fit(env, nb_steps=nb_steps, action_repetition=1, visualize=False, verbose=2) dqn.save_weights('new_dqn_weights_%s.h5f' % (nb_steps), overwrite=True)
class RebalancingEnv(gym.Env): """ RebalancingEnv is the environment class for DQN Attributes: model: AMoD system to train dT: time interval for training penalty: penalty of rebalancing a vehicle action_space: action space state: the system state. It's (ui, vi, cik) for every zone, where cik is the cost of going to i. e.g., 67 zones -> 67 * 3. center: the centroid of cells input_dim: input dimension """ def __init__(self, config): super(gym.Env, self).__init__() print("INSIDE INIT FUNCTION") print(config["av_share"]) self.config = config self.model = Model( ZONE_IDS, DEMAND_SOURCE, WARMUP_TIME_HOUR, ANALYSIS_TIME_HOUR, FLEET_SIZE=config["fleet_size"], PRO_SHARE=config["pro_s"], SURGE_MULTIPLIER=config["surge"], bonus=config["bonus"], percent_false_demand=config["percent_false_demand"], percentage_know_fare=config["perc_k"], AV_share=config["av_share"], ) self.dT = INT_REBL self.action_space = spaces.Discrete(len(ZONE_IDS)) # why not define an observation space? self.state = np.zeros((len(ZONE_IDS), 3)) self.observation_space = np.zeros((len(ZONE_IDS), 3)) # self.center = np.zeros((Mlng, Mlat, 2)) self.input_dim = 3 * len(ZONE_IDS) self.step_count = 0 self.epi_count = 0 self.total_reward = 0.0 self.T = WARMUP_TIME_SECONDS self.old_income = 0 def step(self, actions): """ actions: a vector of length N_AV, which contains the target zone for idle veh, and inaction for busy ones impelements action, returns new state, reward. Currently the DQN is inside the model.dispatch_at_time function """ # print("Inside Step") # print("Step count: ", self.step_count) # print("T: ", self.T) flag = False self.step_count += 1 for i, veh in enumerate([v for v in self.model.av_vehs]): # if the veh has to move, then move it if not np.isnan(actions[i]): veh.set_action(actions[i]) # move the world forward self.model.dispatch_at_time(self.T) self.T = self.T + INT_ASSIGN # print("end T: ", self.T) state_n = [] for i, veh in enumerate([v for v in self.model.av_vehs]): state_n.append(self.model.get_state(veh, self.T)) # total_new_income = np.sum(veh.profits) - self.old_income # self.old_income = np.sum(veh.profits) # # normalize the reward. # # from previous runs, avg revenue is 35 with std of 5 # # (base on Nuts and bolts of DRL) # normalized_income = (total_new_income ) #/10 # reward = normalized_income # print("reward") # print(reward) # total_new_income = np.sum(model.operator.revenues) - self.old_income # self.old_income = np.sum(model.operator.revenues) # reward += total_new_income # report = self.model.get_service_rate_per_zone() # system_LOS = report.served.sum()/report.total.sum() # reward += system_LOS # self.T = self.T+INT_ASSIGN print("T_TOTAL_SECONDS", T_TOTAL_SECONDS) print("self.T", self.T) if self.T >= T_TOTAL_SECONDS: flag = True print("Episode is done!") return state_n, None, flag, {} def reset(self): print("Calling the reset method! ") self.model = Model( ZONE_IDS, DEMAND_SOURCE, WARMUP_TIME_HOUR, ANALYSIS_TIME_HOUR, FLEET_SIZE=self.config["fleet_size"], PRO_SHARE=self.config["pro_s"], SURGE_MULTIPLIER=self.config["surge"], bonus=self.config["bonus"], percent_false_demand=self.config["percent_false_demand"], percentage_know_fare=self.config["perc_k"], AV_share=self.config["av_share"], ) self.total_reward = 0.0 self.T = WARMUP_TIME_SECONDS self.old_income = 0 state_n = [] for _, veh in enumerate([v for v in self.model.av_vehs]): state_n.append(self.model.get_state(veh, self.T)) return state_n
def main(): parser = argparse.ArgumentParser( description="Simulation of drivers' behavior") parser.add_argument( '-f', '--fleet', help= 'Fleet sizes to simulate, formatted as comma-separated list (i.e. "-f 250,275,300")' ) parser.add_argument( '-m', '--multiplier', help= 'Surge multiplier, formatted as comma-separated list (i.e. "-m 1,1.5,2")' ) parser.add_argument('-b', '--bonus', type=int, help='Bonus') parser.add_argument('-d', '--demand', help='Percent false demand ') parser.add_argument( '-k', '--know', help= 'Percent knowing fare, formatted as comma-separated list (i.e. "-m 1,1.5,2") ' ) parser.add_argument( '-p', '--pro', help= 'Percent pro drivers, formatted as comma-separated list (i.e. "-m 1,1.5,2") ' ) parser.add_argument('-r', '--replications', help='number of times to run the simulation') parser.add_argument('-bb', '--beta', help='BETA') parser.add_argument('-b_policy', '--bonus_policy', help='bonus per zone ') parser.add_argument('-budget', '--budget', help='budget ') args = parser.parse_args() # TODO: argpars should get the bonus policy as input data_instance = Data() if args.fleet: fleet_sizes = [int(args.fleet)] else: fleet_sizes = data_instance.FLEET_SIZE if args.multiplier: # surge = args.multiplier surges = [float(x) for x in args.multiplier.split(',')] else: surges = [data_instance.SURGE_MULTIPLIER] if args.know: perc_know = [float(args.know)] else: perc_know = [data_instance.PERCE_KNOW] if args.bonus: bonus = args.bonus else: bonus = data_instance.BONUS if args.beta: beta = float(args.beta) else: beta = configs_dict["BETA"] if args.pro: pro_share = [float(x) for x in args.pro.split(',')] else: pro_share = [data_instance.PRO_SHARE] if args.demand: percent_false_demand = float(args.demand) else: percent_false_demand = data_instance.PERCENT_FALSE_DEMAND if args.replications: n_rep = int(args.replications) else: n_rep = 1 if args.bonus_policy: bonus_policy = args.bonus_policy else: bonus_policy = data_instance.BONUS_POLICY if args.budget: budget = args.budget else: budget = data_instance.BUDGET # output_path = "./Outputs/avg_fare_info/" + str(beta) + "/" for fleet_size in fleet_sizes: for surge in surges: for perc_k in perc_know: for pro_s in pro_share: for repl in range(n_rep): output_path = "./Outputs/avg_fare_info/" + str(budget) + "_" + str(bonus_policy) + "_" + \ str(datetime.datetime.now()).split('.')[0] + "/" if not os.path.exists(output_path): os.makedirs(output_path) print("iteration number ", repl) print('Fleet size is {f}'.format(f=fleet_size)) print('Surge is {}'.format(surge)) print('Percentage knowing fares is {}'.format(perc_k)) print('Percentage of professional drivers {}'.format( pro_s)) data_instance.FLEET_SIZE = fleet_size data_instance.PRO_SHARE = pro_s data_instance.SURGE_MULTIPLIER = surge data_instance.BONUS = bonus data_instance.PERCENT_FALSE_DEMAND = percent_false_demand data_instance.PERCE_KNOW = perc_k m = Model(data_instance, configs_dict, beta) # start time stime = time.time() # # dispatch the system for T_TOTAL seconds, at the interval of INT_ASSIGN for T in range(data_instance.WARMUP_TIME_SECONDS, data_instance.T_TOTAL_SECONDS, data_instance.INT_ASSIGN): m.dispatch_at_time(T) print('Total drivers: ', len(m.vehicles)) print( '# of Pro drivers: ', len([ v for v in m.vehicles if v.driver_type == DriverType.PROFESSIONAL ])) print( '# of naive drivers: ', len([ v for v in m.vehicles if v.driver_type == DriverType.NAIVE ])) print( '# of inexperienced drivers: ', len([ v for v in m.vehicles if v.driver_type == DriverType.INEXPERIENCED ])) # end time etime = time.time() # run time of this simulation runtime = etime - stime print("The run time was {runtime} minutes ".format( runtime=runtime / 60)) report = m.get_service_rate_per_zone() # So that it doesn't save a file with 1.5.py, rather 15.py ss = str(surge).split('.') ss = ''.join(ss) report.to_csv(output_path + "report for fleet size " + str(fleet_size) + " surge " + str(ss) + "fdemand= " + str(percent_false_demand) + "perc_k " + str(perc_k) + "pro_s " + str(pro_s) + " repl" + str(repl) + ".csv")
def main(): parser = argparse.ArgumentParser( description="Simulation of drivers' behavior") parser.add_argument( "-f", "--fleet", help= 'Fleet sizes to simulate, formatted as comma-separated list (i.e. "-f 250,275,300")', ) parser.add_argument( "-m", "--multiplier", help= 'Surge multiplier, formatted as comma-separated list (i.e. "-m 1,1.5,2")', ) parser.add_argument("-b", "--bonus", type=int, help="Bonus") parser.add_argument("-d", "--demand", help="Percent false demand ") parser.add_argument( "-k", "--know", help= 'Percent knowing fare, formatted as comma-separated list (i.e. "-m 1,1.5,2") ', ) parser.add_argument( "-p", "--pro", help= 'Percent pro drivers, formatted as comma-separated list (i.e. "-m 1,1.5,2") ', ) parser.add_argument("-r", "--replications", help="number of times to run the simulation") args = parser.parse_args() if args.fleet: fleet_sizes = [int(x) for x in args.fleet.split(",")] else: fleet_sizes = FLEET_SIZE if args.multiplier: # surge = args.multiplier surges = [float(x) for x in args.multiplier.split(",")] else: surges = [SURGE_MULTIPLIER] if args.know: # surge = args.multiplier perc_know = [float(x) for x in args.know.split(",")] else: perc_know = [PERCE_KNOW] if args.bonus: bonus = args.bonus else: bonus = BONUS if args.pro: pro_share = [float(x) for x in args.pro.split(",")] else: pro_share = [PRO_SHARE] if args.demand: percent_false_demand = float(args.demand) else: percent_false_demand = PERCENT_FALSE_DEMAND if args.replications: n_rep = int(args.replications) else: n_rep = 1 for fleet_size in fleet_sizes: for surge in surges: for perc_k in perc_know: for pro_s in pro_share: for repl in range(n_rep): print("iteration number ", repl) print("Fleet size is {f}".format(f=fleet_size)) print("Surge is {}".format(surge)) print("Percentage knowing fares is {}".format(perc_k)) print("Percentage of professional drivers {}".format( pro_s)) m = Model( ZONE_IDS, DEMAND_SOURCE, WARMUP_TIME_HOUR, ANALYSIS_TIME_HOUR, fleet_size=fleet_size, pro_share=pro_s, surge_multiplier=surge, bonus=bonus, percent_false_demand=percent_false_demand, percentage_know_fare=perc_k, ) # start time stime = time.time() # # dispatch the system for T_TOTAL seconds, at the interval of INT_ASSIGN for T in range(WARMUP_TIME_SECONDS, T_TOTAL_SECONDS, INT_ASSIGN): m.dispatch_at_time(T) # end time etime = time.time() # run time of this simulation runtime = etime - stime print("The run time was {runtime} minutes ".format( runtime=runtime / 60)) m.runtime = runtime report = m.get_service_rate_per_zone() # So that it doesn't save a file with 1.5.py, rather 15.py ss = str(surge).split(".") ss = "".join(ss) report.to_csv(output_path + "report for fleet size " + str(fleet_size) + " surge " + str(ss) + "fdemand= " + str(percent_false_demand) + "perc_k " + str(perc_k) + "pro_s " + str(pro_s) + " repl" + str(repl) + ".csv") pickle.dump( m, open( output_path + "model for fleet size " + str(fleet_size) + " surge " + str(ss) + "fdemand " + str(percent_false_demand) + "perc_k " + str(perc_k) + "pro_s " + str(pro_s) + " repl" + str(repl) + ".p", "wb", ), )
def main(): print("Start of main()") # TODO: all these should be cleaned up like this: # https://github.com/sisl/MADRL/blob/master/madrl_environments/walker/train_multi_walker.py parser = argparse.ArgumentParser(description="Simulation of drivers' behavior") parser.add_argument('-f', '--fleet', help='Fleet sizes to simulate, formatted as comma-separated list (i.e. "-f 250,275,300")') parser.add_argument('-m', '--multiplier', help='Surge multiplier, formatted as comma-separated list (i.e. "-m 1,1.5,2")') parser.add_argument('-b', '--bonus', type=int, help='Bonus') parser.add_argument('-d', '--demand', help='Percent false demand ') parser.add_argument('-k', '--know', help='Percent knowing fare, formatted as comma-separated list (i.e. "-m 1,1.5,2") ') parser.add_argument('-p', '--pro', help='Percent pro drivers, formatted as comma-separated list (i.e. "-m 1,1.5,2") ') parser.add_argument('-r', '--replications', help='number of times to run the simulation') parser.add_argument('-bb', '--beta', help='BETA') parser.add_argument('-b_policy', '--bonus_policy', help='bonus per zone ') parser.add_argument('-budget', '--budget', help='budget ') args = parser.parse_args() # TODO: argpars should get the bonus policy as input print("instantiate Data object") data_instance = Data() if args.fleet: fleet_sizes = [int(args.fleet)] else: fleet_sizes = data_instance.FLEET_SIZE if args.multiplier: # surge = args.multiplier surges = [float(x) for x in args.multiplier.split(',')] else: surges = [data_instance.SURGE_MULTIPLIER] if args.know: perc_know = [float(args.know)] else: perc_know = [data_instance.PERCE_KNOW] if args.bonus: bonus = args.bonus else: bonus = data_instance.BONUS if args.beta: beta = float(args.beta) else: beta = config_dict["BETA"] if args.pro: pro_share = [float(x) for x in args.pro.split(',')] else: pro_share = [data_instance.PRO_SHARE] if args.demand: percent_false_demand = float(args.demand) else: percent_false_demand = data_instance.PERCENT_FALSE_DEMAND if args.replications: n_rep = int(args.replications) else: n_rep = 5 if args.bonus_policy: bonus_policy = args.bonus_policy else: bonus_policy = data_instance.BONUS_POLICY if args.budget: budget = args.budget else: budget = data_instance.BUDGET # output_path = "./Outputs/avg_fare_info/" + str(beta) + "/" for fleet_size in fleet_sizes: for surge in surges: for perc_k in perc_know: for pro_s in pro_share: for repl in range(n_rep): # output_path = "./Outputs/avg_fare_info/" + str(budget) + "_" + str(bonus_policy) + "_" + \ # str(datetime.datetime.now()).split('.')[0] + "/" # if not os.path.exists(output_path): # os.makedirs(output_path) print("iteration number ", repl) print('Fleet size is {f}'.format(f=fleet_size)) print('Surge is {}'.format(surge)) print('Percentage knowing fares is {}'.format(perc_k)) print('Percentage of professional drivers {}'.format(pro_s)) data_instance.FLEET_SIZE = fleet_size data_instance.PRO_SHARE = pro_s data_instance.SURGE_MULTIPLIER = surge data_instance.BONUS = bonus data_instance.PERCENT_FALSE_DEMAND = percent_false_demand data_instance.PERCE_KNOW = perc_k print("Instantiated the model") m = Model(data_instance, beta) # start time stime = time.time() # # dispatch the system for T_TOTAL seconds, at the interval of INT_ASSIGN for T in range(data_instance.WARMUP_TIME_SECONDS, data_instance.T_TOTAL_SECONDS, data_instance.INT_ASSIGN): m.dispatch_at_time(T) # end time etime = time.time() # run time of this simulation runtime = etime - stime print("The run time was {runtime} minutes ".format(runtime=runtime / 60)) m.save_zonal_stats("../performance_stats/")
import numpy as np from lib.Data import Data from lib.configs import config_dict from lib.utils import Model from lib.Constants import POLICY_UPDATE_INTERVAL, WARMUP_TIME_SECONDS, T_TOTAL_SECONDS, INT_ASSIGN, \ ANALYSIS_TIME_SECONDS, DEMAND_UPDATE_INTERVAL import time data_instance = Data.init_from_config_dic(config_dict) m = Model(data_instance) print('Fleet size is {f}'.format(f=data_instance.FLEET_SIZE)) stime = time.time() # # dispatch the system for T_TOTAL seconds, at the interval of INT_ASSIGN for T in range(data_instance.WARMUP_TIME_SECONDS, data_instance.T_TOTAL_SECONDS, data_instance.INT_ASSIGN): m.dispatch_at_time(T) # end time etime = time.time() # run time of this simulation runtime = etime - stime print("The run time was {runtime} minutes ".format(runtime=runtime / 60)) m.save_zonal_stats("../performance_stats/")