def use_the_model_with_a_planner(name, omega, n_iteration): writer = cv2.VideoWriter("output.avi",cv2.VideoWriter_fourcc(*"MJPG"), 30,(1200,800)) # Environment env = FireEnvironment(64, 64) # Vehicle to generate observation mask vehicle = Vehicle(n_time_windows=512, grid_size=(64,64)) # Load the model dyn_autoencoder = DynamicAutoEncoder(SETTING, grid_size = (env.map_width, env.map_height), n_state=3, n_obs=3, encoding_dim=16, gru_hidden_dim=16) dyn_autoencoder.load_the_model(name, omega, n_iteration) ######################################## ### Interacting with the Environment ### ######################################## mask_obs, obs, state = env.reset() map_visit_mask, img_resized = vehicle.full_mask() state_est_grid = dyn_autoencoder.u_k for i in tqdm.tqdm(range(2000)): ### Collect Data from the Env. ### #map_visit_mask, img_resized = vehicle.generate_a_random_trajectory(state_est_grid) map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample=100, omega=0.0) mask_obs, obs, state, reward = env.step(map_visit_mask) ### Run the Estimator ### state_est_grid = dyn_autoencoder.step(mask_obs, map_visit_mask) ### Render the Env. and the Est. ### img_env = env.output_image() img_state_est_grid = dyn_autoencoder.output_image(state_est_grid) render('env', img_env, 1) render('img_state_est_grid', img_state_est_grid, 1) ### Save the video img_env_uint8 = (img_env*255).astype('uint8') img_state_est_grid_uint8 = (img_state_est_grid*255).astype('uint8') backtorgb = cv2.cvtColor(img_state_est_grid_uint8,cv2.COLOR_GRAY2RGB) img = np.concatenate((img_env_uint8, backtorgb), axis=0) writer.write(img) writer.release() render('env', img_env, 1) render('img_state_est_grid', img_state_est_grid, 1)
def demo4_LearningPathPlanning(setting): n_sample = 100 # Environment env = FireEnvironment(64, 64) # Vehicle to generate observation mask vehicle = Vehicle(n_time_windows=512, grid_size=(64, 64), planner_type='Default') # Trainer and Estimator dyn_autoencoder = DynamicAutoEncoder(SETTING, grid_size=(env.map_width, env.map_height), n_state=3, n_obs=3, encoding_dim=16, gru_hidden_dim=16) ### DQN agent dqn_agent = DQN_Agent(state_size=16, action_size=4, replay_memory_size=1000, batch_size=64, gamma=0.99, learning_rate=0.01, target_tau=0.01, update_rate=1, seed=0) # Train Data Buffer memory = SingleTrajectoryBuffer(N_MEMORY_SIZE) # Train Iteration Logger writer = SummaryWriter() # Video Writier video_writer1 = ImageStreamWriter('LearningPlanner.avi', FPS, image_size=(1200, 820)) # Add concat. text setting_text = '' for k, v in setting.items(): setting_text += k setting_text += ':' setting_text += str(v) setting_text += '\t' writer.add_text('setting', setting_text) ######################################## ### Interacting with the Environment ### ######################################## mask_obs, obs, state = env.reset() state_est_grid = dyn_autoencoder.u_k ### Loss Monitors ### list_loss = [] list_cross_entropy_loss = [] list_entropy_loss = [] list_rewards = [] list_new_fire_count = [] list_action = [] ### Filling the Data Buffer ### for i in tqdm.tqdm(range(N_TRAIN_WAIT)): map_visit_mask, img_resized = vehicle.full_mask() mask_obs, obs, state, reward, info = env.step(map_visit_mask) memory.add(mask_obs.detach().long(), state.detach().long(), map_visit_mask.detach().long()) for i in tqdm.tqdm(range(N_TOTAL_TIME_STEPS)): # determine epsilon-greedy action from current sate h_k = dyn_autoencoder.h_k.squeeze().data.cpu().numpy() epsilon = 0.1 action = dqn_agent.act(h_k, epsilon) list_action.append(action) ### Collect Data from the Env. ### map_visit_mask, img_resized = vehicle.plan_a_trajectory( state_est_grid, n_sample, action) mask_obs, obs, state, reward, info = env.step(map_visit_mask) memory.add(mask_obs.detach().long(), state.detach().long(), map_visit_mask.detach().long()) ### Run the Estimator ### state_est_grid = dyn_autoencoder.step(mask_obs, map_visit_mask) h_kp1 = dyn_autoencoder.h_k.squeeze().data.cpu().numpy() #### Update the reinforcement learning agent ### dqn_agent.step(h_k, action, reward, h_kp1, done=False) list_rewards.append(reward) list_new_fire_count.append(info['new_fire_count']) ################################ ### Rendering and Save Video ### ################################ img_env = env.output_image() img_agent = dyn_autoencoder.output_image(state_est_grid) # State Est #blank = np.zeros((400, 200, 3)) img_top = img_env #np.concatenate((blank, img_env[:,:800], blank), axis=1) blank = np.zeros((20, 1200, 3)) img_top = np.concatenate((img_top, blank), axis=0) img_top = (img_top * 255).astype('uint8') img_state_est_grid_uint8 = (img_agent * 255).astype('uint8') backtorgb = cv2.cvtColor(img_state_est_grid_uint8, cv2.COLOR_GRAY2RGB) img_bayes_uint8 = np.concatenate((img_top, backtorgb), axis=0) #<-- to be saved render('Dynamic Auto Encoder', img_bayes_uint8, 1) # Save video # video_writer1.write_image_frame(img_bayes_uint8) ### Training ### loss_val, loss_val_cross, loss_val_ent, O_np_val = dyn_autoencoder.update( memory, N_TRAIN_BATCH, N_TRAIN_WINDOW) list_loss.append(loss_val) list_cross_entropy_loss.append(loss_val_cross) list_entropy_loss.append(loss_val_ent) if i % N_LOGGING_PERIOD == 0: avg_loss = np.mean(np.array(list_loss)) list_loss = [] writer.add_scalar('dynautoenc/loss', avg_loss, i) avg_loss_cross = np.mean(np.array(list_cross_entropy_loss)) list_cross_entropy_loss = [] writer.add_scalar('dynautoenc/crossentropy', avg_loss_cross, i) avg_loss_entropy = np.mean(np.array(list_entropy_loss)) list_entropy_loss = [] writer.add_scalar('dynautoenc/shannonentropy', avg_loss_entropy, i) avg_reward = np.mean(np.array(list_rewards)) list_rewards = [] writer.add_scalar('perform/rewards', avg_reward, i) avg_new_fire_count = np.mean(np.array(list_new_fire_count)) list_new_fire_count = [] writer.add_scalar('perform/new_fire_counts', avg_new_fire_count, i) writer.add_scalar('perform/pc_coverd_new_fire', avg_reward / avg_new_fire_count, i) action_0_count = list_action.count(0) action_1_count = list_action.count(1) action_2_count = list_action.count(2) action_3_count = list_action.count(3) writer.add_scalar('action_count/0', action_0_count / len(list_action), i) writer.add_scalar('action_count/1', action_1_count / len(list_action), i) writer.add_scalar('action_count/2', action_2_count / len(list_action), i) writer.add_scalar('action_count/3', action_3_count / len(list_action), i) list_action = [] writer.add_scalar('obs_state0/o00', O_np_val[0][0], i) writer.add_scalar('obs_state1/o01', O_np_val[0][1], i) writer.add_scalar('obs_state2/o02', O_np_val[0][2], i) writer.add_scalar('obs_state0/o10', O_np_val[1][0], i) writer.add_scalar('obs_state1/o11', O_np_val[1][1], i) writer.add_scalar('obs_state2/o12', O_np_val[1][2], i) writer.add_scalar('obs_state0/o20', O_np_val[2][0], i) writer.add_scalar('obs_state1/o21', O_np_val[2][1], i) writer.add_scalar('obs_state2/o22', O_np_val[2][2], i) print( 'losses at iteration: %d, losses: total %.3f, cross %.3f, shannon %.3f' % (i, avg_loss, avg_loss_cross, avg_loss_entropy)) print('memory size at iteration: %d, size: %d' % (i, len(memory.obs_memory))) if (i + 1) % N_SAVING_PERIOD == 0: f_name = setting['name'] dyn_autoencoder.save_the_model(i, f_name) dqn_agent.save_the_model(i, f_name) video_writer1.close()
def demo3_SysID(setting): n_sample = 1 action_param = 3 # Environment env = FireEnvironment(64, 64) # Vehicle to generate observation mask vehicle = Vehicle(n_time_windows=512, grid_size=(64, 64), planner_type='Random') # Trainer and Estimator dyn_autoencoder = DynamicAutoEncoder(SETTING, grid_size=(env.map_width, env.map_height), n_state=3, n_obs=3, encoding_dim=16, gru_hidden_dim=16) # Train Data Buffer memory = SingleTrajectoryBuffer(N_MEMORY_SIZE) # Train Iteration Logger writer = SummaryWriter() # Video Writier video_writer1 = ImageStreamWriter('RandomPathSysId.avi', FPS, image_size=(1200, 820)) # Add concat. text setting_text = '' for k, v in setting.items(): setting_text += k setting_text += ':' setting_text += str(v) setting_text += '\t' writer.add_text('setting', setting_text) ######################################## ### Interacting with the Environment ### ######################################## mask_obs, obs, state = env.reset() state_est_grid = dyn_autoencoder.u_k map_visit_mask, img_resized = vehicle.plan_a_trajectory( state_est_grid, n_sample, action_param) ### Loss Monitors ### list_loss = [] list_cross_entropy_loss = [] list_entropy_loss = [] ### Filling the Data Buffer ### for i in tqdm.tqdm(range(N_TRAIN_WAIT)): map_visit_mask, img_resized = vehicle.full_mask() mask_obs, obs, state, reward, info = env.step(map_visit_mask) memory.add(mask_obs, state, map_visit_mask) for i in tqdm.tqdm(range(N_TOTAL_TIME_STEPS)): # determine epsilon-greedy action from current sate h_k = dyn_autoencoder.h_k.squeeze().data.cpu().numpy() ### Collect Data from the Env. ### map_visit_mask, img_resized = vehicle.plan_a_trajectory( state_est_grid, n_sample, action_param) mask_obs, obs, state, reward, info = env.step(map_visit_mask) memory.add(mask_obs, state, map_visit_mask) ### Run the Estimator ### state_est_grid = dyn_autoencoder.step(mask_obs, map_visit_mask) h_kp1 = dyn_autoencoder.h_k.squeeze().data.cpu().numpy() ################################ ### Rendering and Save Video ### ################################ img_env = env.output_image() img_agent = dyn_autoencoder.output_image(state_est_grid) # State Est #blank = np.zeros((400, 200, 3)) img_top = img_env #np.concatenate((blank, img_env[:,:800], blank), axis=1) blank = np.zeros((20, 1200, 3)) img_top = np.concatenate((img_top, blank), axis=0) img_top = (img_top * 255).astype('uint8') img_state_est_grid_uint8 = (img_agent * 255).astype('uint8') backtorgb = cv2.cvtColor(img_state_est_grid_uint8, cv2.COLOR_GRAY2RGB) img_bayes_uint8 = np.concatenate((img_top, backtorgb), axis=0) #<-- to be saved render('Dynamic Auto Encoder', img_bayes_uint8, 1) # Save video # video_writer1.write_image_frame(img_bayes_uint8) ### Training ### loss_val, loss_val_cross, loss_val_ent, O_np_val = dyn_autoencoder.update( memory, N_TRAIN_BATCH, N_TRAIN_WINDOW) list_loss.append(loss_val) list_cross_entropy_loss.append(loss_val_cross) list_entropy_loss.append(loss_val_ent) if i % N_LOGGING_PERIOD == 0: avg_loss = np.mean(np.array(list_loss)) list_loss = [] writer.add_scalar('dynautoenc/loss', avg_loss, i) avg_loss_cross = np.mean(np.array(list_cross_entropy_loss)) list_cross_entropy_loss = [] writer.add_scalar('dynautoenc/crossentropy', avg_loss_cross, i) avg_loss_entropy = np.mean(np.array(list_entropy_loss)) list_entropy_loss = [] writer.add_scalar('dynautoenc/shannonentropy', avg_loss_entropy, i) writer.add_scalar('obs_state0/o00', O_np_val[0][0], i) writer.add_scalar('obs_state1/o01', O_np_val[0][1], i) writer.add_scalar('obs_state2/o02', O_np_val[0][2], i) writer.add_scalar('obs_state0/o10', O_np_val[1][0], i) writer.add_scalar('obs_state1/o11', O_np_val[1][1], i) writer.add_scalar('obs_state2/o12', O_np_val[1][2], i) writer.add_scalar('obs_state0/o20', O_np_val[2][0], i) writer.add_scalar('obs_state1/o21', O_np_val[2][1], i) writer.add_scalar('obs_state2/o22', O_np_val[2][2], i) print( 'losses at iteration: %d, losses: total %.3f, cross %.3f, shannon %.3f' % (i, avg_loss, avg_loss_cross, avg_loss_entropy)) print('memory size at iteration: %d, size: %d' % (i, len(memory.obs_memory))) if (i + 1) % N_SAVING_PERIOD == 0: f_name = setting['name'] dyn_autoencoder.save_the_model(i, f_name) video_writer1.close()
def demo5_ComparePolicies(setting, env): n_sample = 2048 # Vehicle to generate observation mask vehicle = Vehicle(n_time_windows=64, grid_size=(64,64), planner_type='Default') # Trainer and Estimator dyn_autoencoder = DynamicAutoEncoder(SETTING, grid_size = (env.map_width, env.map_height), n_state=3, n_obs=3, encoding_dim=4, gru_hidden_dim=4) ### DQN agent dqn_agent = DQN_Agent(state_size=4, action_size=4, replay_memory_size=1000, batch_size=64, gamma=0.99, learning_rate=0.01, target_tau=0.01, update_rate=1, seed=0) # Train Data Buffer memory = SingleTrajectoryBuffer(N_MEMORY_SIZE) # Video Writier ''' video_f_name = 'UsePlanner'+ '_' + setting['name'] + '_' + setting['policy_type'] + '.avi' video_writer1 = ImageStreamWriter(video_f_name, FPS, image_size=(1200,820)) ''' # Train Iteration Logger writer = SummaryWriter() # Add concat. text setting_text = '' for k,v in setting.items(): setting_text += k setting_text += ':' setting_text += str(v) setting_text += '\t' writer.add_text('setting', setting_text) ######################################## ### Interacting with the Environment ### ######################################## ### Loss Monitors ### list_rewards = [] list_new_fire_count = [] list_action = [] list_loss = [] ### Filling the Data Buffer ### for i in tqdm.tqdm(range(N_TRAIN_WAIT)): map_visit_mask, img_resized = vehicle.full_mask() mask_obs, obs, state, reward, info = env.step(map_visit_mask) memory.add(mask_obs.detach().long(), state.detach().long(), map_visit_mask.detach().long()) mask_obs, obs, state = env.reset() state_est_grid = dyn_autoencoder.u_k for i in tqdm.tqdm(range(N_TOTAL_TIME_STEPS)): # determine epsilon-greedy action from current sate h_k = dyn_autoencoder.h_k.squeeze().data.cpu().numpy() epsilon = 0.1 action = dqn_agent.act(h_k, epsilon) ### Collect Data from the Env. ### # Plan a trajectory policy_type = setting['policy_type'] if policy_type == 'Default': map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample, action) elif policy_type == 'Random': action = 777 map_visit_mask, img_resized = vehicle.generate_a_random_trajectory() elif policy_type == 'Act0': action = 0 map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample, action) elif policy_type == 'Act1': action = 1 map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample, action) elif policy_type == 'Act2': action = 2 map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample, action) else: action = 3 map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample, action) list_action.append(action) # Collect the masked observation mask_obs, obs, state, reward, info = env.step(map_visit_mask) memory.add(mask_obs.detach().long(), state.detach().long(), map_visit_mask.detach().long()) ### Run the Estimator ### state_est_grid = dyn_autoencoder.step(mask_obs, map_visit_mask) h_kp1 = dyn_autoencoder.h_k.squeeze().data.cpu().numpy() list_rewards.append(reward) list_new_fire_count.append(info['new_fire_count']) update = True #### Update the reinforcement learning agent and Dyn Auto Enc ### if policy_type != 'Random': dqn_agent.step(h_k, action, reward, h_kp1, False, update) loss_val, loss_val_cross, loss_val_ent, O_np_val = dyn_autoencoder.update(memory, N_TRAIN_BATCH, N_TRAIN_WINDOW, update) list_loss.append(loss_val) ################################ ### Rendering and Save Video ### ################################ img_env = env.output_image() img_agent = dyn_autoencoder.output_image(state_est_grid) # State Est #blank = np.zeros((400, 200, 3)) img_top = img_env #np.concatenate((blank, img_env[:,:800], blank), axis=1) blank = np.zeros((20, 1200, 3)) img_top = np.concatenate((img_top, blank), axis=0) img_top = (img_top*255).astype('uint8') img_state_est_grid_uint8 = (img_agent*255).astype('uint8') backtorgb = cv2.cvtColor(img_state_est_grid_uint8, cv2.COLOR_GRAY2RGB) img_bayes_uint8 = np.concatenate((img_top, backtorgb), axis=0) #<-- to be saved render('Dynamic Auto Encoder', img_bayes_uint8, 1) # Save video # #video_writer1.write_image_frame(img_bayes_uint8) if i%N_LOGGING_PERIOD == 0: avg_reward = np.mean(np.array(list_rewards)) list_rewards = [] writer.add_scalar('perform/rewards', avg_reward, i) avg_new_fire_count = max(np.mean(np.array(list_new_fire_count)), 1) # to avoid division by zero list_new_fire_count = [] writer.add_scalar('perform/new_fire_counts', avg_new_fire_count, i) writer.add_scalar('perform/pc_coverd_new_fire', avg_reward/avg_new_fire_count, i) if policy_type != 'Random': avg_loss = np.mean(np.array(list_loss)) list_loss = [] writer.add_scalar('dynautoenc/loss', avg_loss, i) action_0_count = list_action.count(0) action_1_count = list_action.count(1) action_2_count = list_action.count(2) action_3_count = list_action.count(3) writer.add_scalar('action_count/0', action_0_count/len(list_action), i) writer.add_scalar('action_count/1', action_1_count/len(list_action), i) writer.add_scalar('action_count/2', action_2_count/len(list_action), i) writer.add_scalar('action_count/3', action_3_count/len(list_action), i) list_action = [] writer.add_scalar('obs_state0/o00', O_np_val[0][0], i) writer.add_scalar('obs_state1/o01', O_np_val[0][1], i) writer.add_scalar('obs_state2/o02', O_np_val[0][2], i) writer.add_scalar('obs_state0/o10', O_np_val[1][0], i) writer.add_scalar('obs_state1/o11', O_np_val[1][1], i) writer.add_scalar('obs_state2/o12', O_np_val[1][2], i) writer.add_scalar('obs_state0/o20', O_np_val[2][0], i) writer.add_scalar('obs_state1/o21', O_np_val[2][1], i) writer.add_scalar('obs_state2/o22', O_np_val[2][2], i)
def initialize_environment(min_request_time: int, max_request_time: int, max_wait_times: List[int], detour_ratios: List[float], vehicle_number: int, vehicle_speed: float) \ -> Tuple[MultiDiGraph, np.ndarray, np.ndarray, np.ndarray, Dict[int, Set[Order]], List[Vehicle]]: """ 初始化环境 :param max_wait_times: 订单最大等待时间集合 :param detour_ratios: 订单最大绕路比集合 :param min_request_time: 提取订单的最小请求时间 :param max_request_time: 提取订单的最大请求时间 :param vehicle_number: 生成的车辆数目 :param vehicle_speed: 车辆的速度 :return graph: 路网图 :return shortest_distance:最短路径长度矩阵 :return shortest_path: 最短路径矩阵 :return shortest_path_with_minute: 1分钟以内的最短路径矩阵 :return orders: 各个时刻的订单信息 :return vehicles: 初始化的车辆列表 """ print("read data from disc") trip_order_data = pd.read_csv("./order_data/trip_order_data.csv") car_fuel_consumption_info = pd.read_csv("car_fuel_consumption_data/car_fuel_consumption_info.csv") graph = ox.load_graphml("Manhattan.graphml", folder="./network_data/") shortest_distance = np.load("./network_data/shortest_distance.npy") shortest_path = np.load("./network_data/shortest_path.npy") shortest_path_with_minute = np.load("./network_data/shortest_path_with_minute.npy") print("build osm_id to index map") osm_id2index = {osm_id: index for index, osm_id in enumerate(graph.nodes)} location_map = {osm_id2index[node[0]]: (node[1]['x'], node[1]['y']) for node in graph.nodes(data=True)} index2osm_id = {index: osm_id for osm_id, index in osm_id2index.items()} GeoLocation.set_location_map(location_map) GeoLocation.set_index2osm_id(index2osm_id) print("generate order data") trip_order_data = trip_order_data[min_request_time <= trip_order_data["time"]] trip_order_data = trip_order_data[max_request_time > trip_order_data["time"]] order_number = trip_order_data.shape[0] pick_up_index_series = trip_order_data["pick_up_index"].values drop_off_index_series = trip_order_data["drop_off_index"].values request_time_series = trip_order_data["time"].values receive_fare_series = (trip_order_data["total_amount"] - trip_order_data["tip_amount"]).values # 我们不考虑订单的中tip成分 n_riders_series = trip_order_data["passenger_count"].values orders = {} for request_time in range(min_request_time, max_request_time): orders[request_time] = set() for i in range(order_number): order_id = i start_location = OrderLocation(int(pick_up_index_series[i]), OrderLocation.PICK_UP_TYPE) end_location = OrderLocation(int(drop_off_index_series[i]), OrderLocation.DROP_OFF_TYPE) request_time = int(request_time_series[i]) max_wait_time = np.random.choice(max_wait_times) order_distance = shortest_distance[start_location.osm_index, end_location.osm_index] if order_distance == 0.0: continue receive_fare = receive_fare_series[i] detour_ratio = np.random.choice(detour_ratios) n_riders = int(n_riders_series[i]) order = Order(order_id, start_location, end_location, request_time, max_wait_time, order_distance, receive_fare, detour_ratio, n_riders) orders[request_time].add(order) print("generate vehicle data") Vehicle.set_average_speed(vehicle_speed) car_osm_ids = np.random.choice(graph.nodes, size=vehicle_number) cars_info = car_fuel_consumption_info.sample(n=vehicle_number) vehicles = [] for i in range(vehicle_number): vehicle_id = i location = GeoLocation(osm_id2index[int(car_osm_ids[i])]) car_info = cars_info.iloc[i, :] available_seats = int(car_info["seats"]) cost_per_distance = float(car_info["fuel_consumption"]) / 6.8 * 2.5 / 1.609344 vehicle = Vehicle(vehicle_id, location, available_seats, cost_per_distance, Vehicle.WITHOUT_MISSION_STATUS) vehicles.append(vehicle) print("finish generate data") return graph, shortest_distance, shortest_path, shortest_path_with_minute, orders, vehicles
def train(fullcover, name, setting): n_sample = 20 # Environment env = FireEnvironment(64, 64) # Vehicle to generate observation mask vehicle = Vehicle(n_time_windows=1000, grid_size=(64,64), planner_type=setting['planner_type']) # Trainer and Estimator dyn_autoencoder = DynamicAutoEncoder(SETTING, grid_size = (env.map_width, env.map_height), n_state=3, n_obs=3, encoding_dim=16, gru_hidden_dim=16) # Train Data Buffer memory = SingleTrajectoryBuffer(N_MEMORY_SIZE) ### DQN agent dqn_agent = DQN_Agent(state_size=16, action_size=4, replay_memory_size=1000, batch_size=64, gamma=0.99, learning_rate=0.01, target_tau=0.01, update_rate=1, seed=0) # Train Iteration Logger from torch.utils.tensorboard import SummaryWriter writer = SummaryWriter() # Add concat. text setting_text = '' for k,v in setting.items(): setting_text += k setting_text += str(v) setting_text += '\t' writer.add_text('setting', setting_text) ######################################## ### Interacting with the Environment ### ######################################## mask_obs, obs, state = env.reset() map_visit_mask, img_resized = vehicle.full_mask() state_est_grid = dyn_autoencoder.u_k ### Loss Monitors ### list_loss = [] list_cross_entropy_loss = [] list_entropy_loss = [] list_rewards = [] list_count_fire_visit = [] list_count_all_fire = [] list_action = [] ### Filling the Data Buffer ### for i in tqdm.tqdm(range(N_TRAIN_WAIT)): if fullcover: map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample, action) else: map_visit_mask, img_resized = vehicle.full_mask() mask_obs, obs, state, reward = env.step(map_visit_mask) memory.add(mask_obs, state, map_visit_mask) for i in tqdm.tqdm(range(N_TOTAL_TIME_STEPS)): # determine epsilon-greedy action from current sate h_k = dyn_autoencoder.h_k.squeeze().data.cpu().numpy() epsilon = 0.1 action = dqn_agent.act(h_k, epsilon) list_action.append(action) ### Collect Data from the Env. ### if fullcover: map_visit_mask, img_resized = vehicle.full_mask() else: map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample, action) mask_obs, obs, state, reward = env.step(map_visit_mask) memory.add(mask_obs, state, map_visit_mask) ### Run the Estimator ### state_est_grid = dyn_autoencoder.step(mask_obs, map_visit_mask) h_kp1 = dyn_autoencoder.h_k.squeeze().data.cpu().numpy() #### Update the reinforcement learning agent ### dqn_agent.step(h_k, action, reward, h_kp1, done=False) list_rewards.append(reward) fire_count = (torch.sum(state[2])).item() fire_visit = (torch.sum(mask_obs.permute(2,0,1) * state[2].unsqueeze(0))).item() if fire_count < 1: print('no fire') else: list_count_fire_visit.append(fire_visit) list_count_all_fire.append(fire_count) ### Render the Env. and the Est. ### if i % N_RENDER_PERIOD == 0: img_env = env.output_image() img_state_est_grid = dyn_autoencoder.output_image(state_est_grid) render('env', img_env, 1) render('img_state_est_grid', img_state_est_grid, 1) ### Training ### loss_val, loss_val_cross, loss_val_ent, O_np_val = dyn_autoencoder.update(memory, N_TRAIN_BATCH, N_TRAIN_WINDOW) list_loss.append(loss_val) list_cross_entropy_loss.append(loss_val_cross) list_entropy_loss.append(loss_val_ent) if i%N_LOGGING_PERIOD == 0: avg_loss = np.mean(np.array(list_loss)) list_loss = [] writer.add_scalar('dynautoenc/loss', avg_loss, i) avg_loss_cross = np.mean(np.array(list_cross_entropy_loss)) list_cross_entropy_loss = [] writer.add_scalar('dynautoenc/crossentropy', avg_loss_cross, i) avg_loss_entropy = np.mean(np.array(list_entropy_loss)) list_entropy_loss = [] writer.add_scalar('dynautoenc/shannonentropy', avg_loss_entropy, i) avg_reward = np.mean(np.array(list_rewards)) list_rewards = [] writer.add_scalar('perform/rewards', avg_reward, i) avg_count_fire_visit = np.mean(np.array(list_count_fire_visit)) list_count_fire_visit = [] writer.add_scalar('perform/avg_count_fire_visit', avg_count_fire_visit, i) avg_count_all_fire = np.mean(np.array(list_count_all_fire)) list_count_all_fire = [] writer.add_scalar('perform/avg_count_all_fire', avg_count_all_fire, i) action_0_count = list_action.count(0) action_1_count = list_action.count(1) action_2_count = list_action.count(2) action_3_count = list_action.count(3) list_action = [] if setting['planner_type'] == 'Default': writer.add_scalar('action_count/0', action_0_count, i) writer.add_scalar('action_count/1', action_1_count, i) writer.add_scalar('action_count/2', action_2_count, i) writer.add_scalar('action_count/3', action_3_count, i) writer.add_scalar('obs_state0/o00', O_np_val[0][0], i) writer.add_scalar('obs_state1/o01', O_np_val[0][1], i) writer.add_scalar('obs_state2/o02', O_np_val[0][2], i) writer.add_scalar('obs_state0/o10', O_np_val[1][0], i) writer.add_scalar('obs_state1/o11', O_np_val[1][1], i) writer.add_scalar('obs_state2/o12', O_np_val[1][2], i) writer.add_scalar('obs_state0/o20', O_np_val[2][0], i) writer.add_scalar('obs_state1/o21', O_np_val[2][1], i) writer.add_scalar('obs_state2/o22', O_np_val[2][2], i) print('losses at iteration: %d, losses: total %.3f, cross %.3f, shannon %.3f' % (i, avg_loss, avg_loss_cross, avg_loss_entropy)) print('memory size at iteration: %d, size: %d' % (i, len(memory.obs_memory))) if (i+1)%N_SAVING_PERIOD==0: f_name = name dyn_autoencoder.save_the_model(i, f_name)