def run_cart_event(): cart_index = int(proc_id.split('_')[1]) if event_type in [ 'car_arrival_stop', 'car_arrival_past', 'alighting', 'boarding', 'decision_time_charging' ]: self.carts[cart_index].updateState(event_type, sim_time) if event_type == 'car_arrival_past': action, reason = ('NAction', 'Car continues past') next_time, next_event_type = self.carts[ cart_index].generateNext_TimeEvent(action) schedule_next_cart_event(next_time, next_event_type) print('-' * 36 + '-' * 36) # print('\n') print(dt.time_handle(sim_time)) # if self.passenger_vector().sum() > 0: # print('Passenger locations before update') # self.print_passengers(self.passenger_vector()) print('Updated Event for {}'.format(self.carts[cart_index].cartID)) print('Current stop of the cart {} is -{}-'.format( self.carts[cart_index].cartID, self.carts[cart_index].current_stop)) print('Current event is -{}- at time {} in STATE -{}-'.format( event_type, dt.time_handle(sim_time), self.carts[cart_index].state)) print('-' * 36 + '-' * 36, '\n')
def generate_passenger(self, time): daytime = dt.time_handle(time_in_seconds=time) # _passengerTypes = {1:'single', 2:'couple', 3:'triple', 4:'family'} rand_type = np.random.choice([1, 2, 3, 4], 1, p=self.PASSNGR_TYPE_PROBS) passengerType = self._passengerTypes[rand_type[0]] destination = self.generate_destination() for i in range(rand_type[0]): new_passenger = psg.Passengers(self, time, passengerType=passengerType, destination=destination) self._passengers.append(new_passenger) self.logger_stop.info( 'Passenger {} {} is generated at time {}' ' at bus stop {} to destination stop {}'.format( new_passenger.passengerID, new_passenger.passengerType, daytime, new_passenger.cart_stop.stopID, new_passenger.destination))
def step(self, chosen_action=None): if chosen_action: '''agent_action will choose here''' action = chosen_action reason = 'Sampled Action' cart_index = int(self.current_proc_id.split('_')[1]) self.current_cart = self.carts[cart_index] # self._action_counter[cart_index] += self._constrained_action_counter[cart_index] passengerVec = self.passenger_vector() cart_position = self.carts[cart_index].current_stop occupancy = self.carts[cart_index].occupancy carried = self.carts[cart_index].tot_passengers_carried # Update state here self.state = self.get_state # House keeping decision times self.cart_decision_times[self.carts[cart_index].cartID].append( self._sim_time) charge = self.carts[cart_index]._charge_indicator self._carts_state_of_charge[self.carts[cart_index].cartID].append( charge) # add state of charge self._action_counter_dict[self.carts[cart_index].cartID].append( self._action_counter[cart_index]) temp_sars_hist_agent = list( self.sars_tuple( sim_time=self._sim_time, cartID=self.current_cart.cartID, current_state=self.state, # passengerVec, next_state='next state', cart_position=cart_position, occupancy=occupancy, carried=carried, call_received=self.carts[cart_index]._call_received_from, action='{:8.8s}'.format(action), reward='{:4.3f}'.format(0), delta_reward=0, time_since=0, event_type='{:20s}'.format(self._current_event_type), reason=self._current_reason)) self.sars_history_agent[self.current_cart.cartID].append( temp_sars_hist_agent) temp_cars = list( self.csars_tuple( sim_time='{:5}'.format(self._sim_time), cartID='{:3s}'.format(self.current_cart.cartID), current_state=passengerVec, action='{:8.8s}'.format('F_' + action), cart_positions=self.print_other_carts(), reward=0, delta_reward=0, time_since=0, occupancy='{:4.2f}'.format(self.current_cart.occupancy), event_type='{:20s}'.format(self._current_event_type), charge='{:1.2f}'.format( self.current_cart._charge_indicator), reward_bat=None, reason=reason, current_stop=self.current_cart.current_stop)) self.csars_agent_all[self.current_cart.cartID].append(temp_cars) temp_reward = list( self.cstate_reward_tuple( state=self.state, action=action, reward_pass=None, state_next=None, charge='{:1.2f}'.format( self.current_cart._charge_indicator), reward_bat=None)) self._cstate_reward_dict[self.current_cart.cartID].append( temp_reward) # Set training data action_ind = 1 if action in ['wait', 'stop'] else 0 temp_state = self.action_state_tuple(state=self.state, reward=None, action=action_ind, timediff=self._sim_time, state_next=None) self._action_states[self.carts[cart_index].cartID] = list( temp_state) # Schedule next time, next event self._action_counter[cart_index] += 1 next_time, next_event = self.carts[ cart_index].generateNext_TimeEvent(action) next_event_full = self.procs[self.current_proc_id].send( (next_time + self._sim_time, next_event)) self.events.put(next_event_full) print('=' * 36 + '=' * 36, '\n') print('={}= is chosen because {}'.format(action, reason)) print('Next event will be -{}- at time {}'.format( next_event, dt.time_handle(self._sim_time + next_time))) print('Passenger locations after update') self.print_passengers(self.passenger_vector()) for i in range(len(self.carts)): print('\n', 'Location of Cart {}'.format(self.carts[i].cartID)) self.print_bus_loc(self.carts[i].current_stop, self.carts[i].occupancy) if self.carts[i].current_state == 'In_Motion': print(' ' * 5 * self.carts[i].current_stop, '---->') print('=' * 36 + '=' * 36, '\n') while self._sim_time < self._end_time: if self.events.empty(): print('*** end of the events ***') self.close() break # get current event and run for required actions on time self.current_event = self.events.get() self._sim_time, self.current_proc_id, self._current_event_type, info = self.current_event self._sim_daytime = dt.time_handle(time_in_seconds=self._sim_time) # Run the current event, update the states and schedule the next event self.event_run() if self.current_proc_id.split('_')[0] == 'C': cart_index = int(self.current_proc_id.split('_')[1]) self.current_cart = self.carts[cart_index] passengerVec = self.passenger_vector() cart_position = self.carts[cart_index].current_stop occupancy = self.carts[cart_index].occupancy carried = self.carts[cart_index].tot_passengers_carried if self._current_event_type != 'car_arrival_past': ''' Carts only take actions before the stops while moving and when there is no get-in get-off when stopping ''' try: action, reason = self.carts[cart_index].chooseAction() except TypeError: a = 1 # vectfunc = np.vectorize(self.carts[cart_index].chooseAction(), otypes=[np.float], cache=False) self._current_action = action self._current_reason = reason if action != 'agent_action': reward = 0.0 delta_reward = 0.0 self._const_action_counter_dict[ self.carts[cart_index].cartID].append( self._action_counter[cart_index]) temp_sars_hist = list( self.sars_tuple( sim_time=self._sim_time, cartID=self.current_cart.cartID, current_state=passengerVec, next_state='next state', cart_position=cart_position, occupancy=occupancy, carried=carried, call_received=self.carts[cart_index]. _call_received_from, action='{:8.8s}'.format(action), reward=0, delta_reward=0, time_since=0, event_type='{:20s}'.format( self._current_event_type), reason=reason)) self.sars_history_constrained[ self.current_cart.cartID].append(temp_sars_hist) temp_cars = list( self.csars_tuple( sim_time='{:5}'.format(self._sim_time), cartID=self.current_cart.cartID, current_state=passengerVec, action='{:8.8s}'.format('_' + action), cart_positions=self.print_other_carts(), reward='{:4.3f}'.format(0.0), delta_reward='{:4.3f}'.format(0.0), time_since='{:4.2}'.format(0.0), occupancy='{:4.2f}'.format( self.current_cart.occupancy), event_type='{:20s}'.format( self._current_event_type), charge='{:1.2f}'.format( self.current_cart._charge_indicator), reward_bat='{:4.3f}'.format(0.0), reason=reason, current_stop=self.current_cart.current_stop)) self.csars_agent_all[self.current_cart.cartID].append( temp_cars) # self._constrained_action_counter[cart_index] += 1 if len(self.sars_history_constrained[ self.current_cart.cartID]) >= 2: update_ind = self._const_action_counter_dict[ self.current_cart.cartID][-2] self.sars_history_constrained[ self.current_cart.cartID][-2][3] = passengerVec self.sars_history_constrained[ self.current_cart. cartID][-2][9] = '{:4.2f}'.format(reward) self.sars_history_constrained[ self.current_cart.cartID][-2][ 10] = '{:4.2f}'.format(delta_reward) temp = self.csars_agent_all[ self.current_cart.cartID][update_ind][2] self.csars_agent_all[self.current_cart.cartID][ update_ind][2] = '{}-A->{}'.format( temp, passengerVec) # Schedule next time, next event next_time, next_event = self.carts[ cart_index].generateNext_TimeEvent(action) next_event_full = self.procs[ self.current_proc_id].send( (next_time + self._sim_time, next_event)) self.events.put(next_event_full) self._action_counter[cart_index] += 1 print('=' * 36 + '=' * 36, '\n') print('={}= is chosen because {}'.format( action, reason)) print('Next event will be -{}- at time {}'.format( next_event, dt.time_handle(self._sim_time + next_time))) print('Passenger locations after update') self.print_passengers(self.passenger_vector()) for i in range(len(self.carts)): print( '\n', 'Location of Cart {}'.format( self.carts[i].cartID)) self.print_bus_loc(self.carts[i].current_stop, self.carts[i].occupancy) if self.carts[i].current_state == 'In_Motion': print(' ' * 5 * self.carts[i].current_stop, '---->') print('=' * 36 + '=' * 36, '\n') # if self.current_cart._state == self.current_cart._cart_state_space[2]: # a = 1 if action == 'agent_action': # observation = self.get_state ta_prime = self._sim_time self.current_cart.carriage_hist( self.current_cart.tot_passengers_carried) # check if previous action taken if len(self._carts_decision_times[ self.current_cart.cartID]) >= 1: ta = self._carts_decision_times[ self.current_cart.cartID][-1] charge_ta = self._carts_state_of_charge[ self.current_cart.cartID][-1] charge_ta_prime = self.current_cart._charge_indicator # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # previous_state = self._action_states[self.carts[cart_index].cartID][0][1:9].sum() # previous_action = self._cstate_reward_dict[self.current_cart.cartID][-1][1] # # reward = 0 # # if previous_action == 'park' or 'launch': # ''' This is penalty to the continue_past_next when there is no passenger''' # # # self._action_type_counter ={key:0 if key is not previous_action else # # value for (key, value) in # # self._action_type_counter.items()} # # # # # self._action_type_counter[previous_action] +=1 # # # reward = -psg.Passengers.waiting_cost_cls(beta=self._beta, ta=0, ta_prime=120) # # reward = -0.1*self._action_type_counter[previous_action]reward = self.current_cart.computeCost_R(ta, ta_prime) / self.current_cart._charge_indicator # reward = 5 + self.current_cart.computeCost_R(ta, ta_prime) # # # elif sum(passengerVec)+previous_state == 0 and previous_action =='continue_past_next': # #zero others # # # self._action_type_counter ={key:0 if key is not previous_action else # # value for (key, value) in # # self._action_type_counter.items()} # # # self._action_type_counter[previous_action] += 1 # # # reward = psg.Passengers.waiting_cost_cls(beta=self._beta, ta=ta, ta_prime=ta_prime) # # reward = psg.Passengers.waiting_cost_cls(beta=self._beta, ta=0, ta_prime=10) # '''ta_prime 1; -0.04, 2; 0.042, 5; 0.162, 10; 0.25''' # # # reward = 0.1 * self._action_type_counter[previous_action] # reward = 5 + self.current_cart.computeCost_R(ta, ta_prime) # # # elif sum(passengerVec)+previous_state == 0 and previous_action =='move': # # # self._action_type_counter ={key:0 if key is not previous_action else # # value for (key, value) in # # self._action_type_counter.items()} # # # self._action_type_counter[previous_action] +=1 # # # reward = psg.Passengers.waiting_cost_cls(beta=self._beta, ta=ta, ta_prime=ta_prime) # # reward = psg.Passengers.waiting_cost_cls(beta=self._beta, ta=0, ta_prime=10) # '''ta_prime 1; -0.04, 2; 0.042, 5; 0.162, 10; 0.25''' # reward = self.current_cart.computeCost_R(ta, ta_prime) # # else: # reward = self.current_cart.computeCost_R(ta, ta_prime) # # # # reward = self.current_cart.computeCost_R(ta, ta_prime) # # ''' No need to compte delta-reward here, it is put here to debug only''' # # delta_reward = self.current_cart.computeCost_deltaR(ta, ta_prime) # delta_reward = 0 # # # # current step rewards # # mean_reward, std_reward = self._func_procs['reward'].send(reward) # # self._running_means_dict['reward'] = (mean_reward, std_reward) # # # # if std_reward != 0: # # self._reward =self.clip((reward-mean_reward)/std_reward) # # # # self._reward = self.clip(reward/self.current_cart._charge_indicator) # # max_count = np.max([self._action_type_counter['park'], self._action_type_counter['park']]) # max_count = 1; # self._reward = self.clip(reward*max_count) # #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% delta_reward = 0 # no need to compute here self._delta_reward = delta_reward reward_passenger = self.current_cart.computeCost_R( ta, ta_prime) # all rewards are the costs reward_battery = self.current_cart.battery_charge_cost( charge_ta, charge_ta_prime) reward = ( self._convex_alpha) * reward_passenger + ( 1.0 - self._convex_alpha) * reward_battery self._reward = reward / 10 previous_action_index = self._action_counter_dict[ self.current_cart.cartID][-1] # if len(self.sars_history_agent[self.current_cart.cartID]) >= 1: # # csars_tuple = namedtuple('action_reward_state', # 'sim_time cartID current_state action cart_positions ' # 'reward delta_reward time_since average_occupancy charge # reward_bat event_type reason') self.sars_history_agent[self.current_cart.cartID][ -1][3] = self.get_state, # self.passenger_vector() self.sars_history_agent[self.current_cart.cartID][ -1][9] = '{:4.3f}'.format(self._reward) self.sars_history_agent[self.current_cart.cartID][ -1][10] = '{:4.3f}'.format(delta_reward) self.sars_history_agent[self.current_cart.cartID][ -1][11] = '{:4.2f}'.format(ta_prime - ta) # Update training set # action_state_tuple = namedtuple('action_state', 'state action reward timediff state_next') self._action_states[self.carts[cart_index]. cartID][2] = self._reward temp_time = self._action_states[ self.carts[cart_index].cartID][3] self._action_states[self.carts[cart_index].cartID][ 3] = self._sim_time - temp_time self._action_states[self.carts[cart_index]. cartID][4] = self.get_state # if len(self.csars_agent_all[self.current_cart.cartID]) >= 1: try: temp = self.csars_agent_all[ self.current_cart. cartID][previous_action_index][2] self.csars_agent_all[self.current_cart.cartID][previous_action_index][2] = \ '{}-->{}'.format(temp, self.passenger_vector()) self.csars_agent_all[self.current_cart.cartID][ previous_action_index][ 5] = '{:4.3f}'.format(self._reward) self.csars_agent_all[self.current_cart.cartID][ previous_action_index][ 6] = '{:4.3f}'.format(delta_reward) self.csars_agent_all[self.current_cart.cartID][previous_action_index][7] = \ '{:4.2f}'.format(ta_prime - ta) # all occupancies occupancy_all = [ cart.occupancy for (key, cart) in self.carts.items() ] self.csars_agent_all[self.current_cart.cartID][previous_action_index][8] = \ '{:4.2f}'.format(sum(occupancy_all)) self.csars_agent_all[self.current_cart.cartID][previous_action_index][10] = \ '{:2.2f}'.format(reward_battery) self._cstate_reward_dict[ self.current_cart. cartID][-1][2] = self._reward self._cstate_reward_dict[ self.current_cart. cartID][-1][3] = self.get_state self._cstate_reward_dict[ self.current_cart. cartID][-1][5] = reward_battery except IndexError: print(IndexError('There is something wrong')) return self._action_states[self.carts[ cart_index].cartID], self._reward, self._done if self._sim_time >= self._end_time: self._done = True self.close() msg = '*** end of simulation time: {} events pending ***' print('\n', msg.format(self.events.qsize())) print('Env {} ended'.format(self._envID), '\n\n') frames_cart_pass = [] frames_sars_constrained = [] frames_sars_agent = [] frames_carts_agent = [] frames_csate_reward = [] meanTimeEnv = [] for (i, cart) in self.carts.items(): cart.stats = pd.DataFrame( cart.stats_list, columns=self.carts[i]._stats_tuple._fields) meanTime = [] for jtemp in range(len(cart.stats_list)): # cart.stats_list[0].waitingDuration meanTime.append( float(cart.stats_list[jtemp].waitingDuration)) if len(meanTime) != 0: meanTime = sum(meanTime) / len(meanTime) else: meanTime = 0.0 # # else: # meanTime = 0 meanTimeEnv.append(meanTime) cart_agent = pd.DataFrame(self.csars_agent_all[cart.cartID], columns=self.csars_tuple._fields) cart_agent.to_csv( '../tests/logs/cart_{}_agent_stats_env_{}.csv'.format( self.carts[i].cartID, self._envID), sep="\t") cart.stats.to_csv( '../tests/logs/cart_{}_passenger_stats_env_{}.csv'.format( self.carts[i].cartID, self._envID), sep="\t") frames_cart_pass.append(cart.stats) frames_csate_reward.append( pd.DataFrame( self._cstate_reward_dict[self.carts[i].cartID], columns=self.cstate_reward_tuple._fields)) self.sars_history_agent[cart.cartID] = pd.DataFrame( list(self.sars_history_agent[cart.cartID]), columns=self.sars_tuple._fields) self.sars_history_agent[cart.cartID].to_csv( '../tests/logs/sars_history_agent_cart{}_env_{}_.csv'. format(i, self._envID), sep='\t') self.sars_history_constrained[cart.cartID] = pd.DataFrame( self.sars_history_constrained[cart.cartID], columns=self.sars_tuple._fields) self.sars_history_constrained[cart.cartID].to_csv( '../tests/logs/sars_history_constrained_cart{}_env_{}_.csv' .format(i, self._envID), sep='\t') frames_carts_agent.append(cart_agent) frames_sars_agent.append(self.sars_history_agent[cart.cartID]) frames_sars_constrained.append( self.sars_history_constrained[cart.cartID]) carts_passenger_stats_pd = pd.concat(frames_cart_pass) sars_cart_const_stat_pd = pd.concat(frames_sars_constrained) sars_cart_agent_stat_pd = pd.concat(frames_sars_agent) cars_agent_stats_pd = pd.concat(frames_carts_agent) cstate_reward_pd = pd.concat(frames_csate_reward) carts_passenger_stats_pd.to_csv( '../tests/logs/carts_passenger_stats_env_{}.csv'.format( self._envID), sep="\t") sars_cart_const_stat_pd.to_csv( '../tests/logs/sars_history_constrained_cart_env_{}.csv'. format(self._envID), sep="\t") sars_cart_agent_stat_pd.to_csv( '../tests/logs/sars_history_agent_cart_env_{}.csv'.format( self._envID), sep="\t") cars_agent_stats_pd.to_csv( '../tests/logs/cars_agent_stats_pd_{}.csv'.format(self._envID), sep="\t") cstate_reward_pd.to_csv( '../tests/logs/cars_cstate_reward_pd_{}.csv'.format( self._envID), sep="\t") self._pass_mean_time = sum(meanTimeEnv) / self.num_of_carts return self._action_states[ self.carts[cart_index].cartID], self._reward, self._done