Beispiel #1
0
def use_the_model_with_a_planner(name, omega, n_iteration):

    writer = cv2.VideoWriter("output.avi",cv2.VideoWriter_fourcc(*"MJPG"), 30,(1200,800))

    # Environment
    env = FireEnvironment(64, 64)

    # Vehicle to generate observation mask
    vehicle = Vehicle(n_time_windows=512, grid_size=(64,64))

    # Load the model
    dyn_autoencoder = DynamicAutoEncoder(SETTING, grid_size = (env.map_width, env.map_height), n_state=3, n_obs=3, encoding_dim=16, gru_hidden_dim=16)
    dyn_autoencoder.load_the_model(name, omega, n_iteration)

    ########################################
    ### Interacting with the Environment ###
    ########################################
    mask_obs, obs, state = env.reset()
    map_visit_mask, img_resized = vehicle.full_mask()
    state_est_grid = dyn_autoencoder.u_k

    for i in tqdm.tqdm(range(2000)):
        ### Collect Data from the Env. ###
        #map_visit_mask, img_resized = vehicle.generate_a_random_trajectory(state_est_grid)
        map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample=100, omega=0.0)
        mask_obs, obs, state, reward = env.step(map_visit_mask)

        ### Run the Estimator ###
        state_est_grid = dyn_autoencoder.step(mask_obs, map_visit_mask)

        ### Render the Env. and the Est. ###
        img_env   = env.output_image()
        img_state_est_grid = dyn_autoencoder.output_image(state_est_grid)
        
        render('env', img_env, 1)
        render('img_state_est_grid', img_state_est_grid, 1)

        ### Save the video
        img_env_uint8 = (img_env*255).astype('uint8')
        img_state_est_grid_uint8 = (img_state_est_grid*255).astype('uint8')
        backtorgb = cv2.cvtColor(img_state_est_grid_uint8,cv2.COLOR_GRAY2RGB)
        img = np.concatenate((img_env_uint8, backtorgb), axis=0)        
        writer.write(img)

    writer.release()

    render('env', img_env, 1)
    render('img_state_est_grid', img_state_est_grid, 1)
Beispiel #2
0
def demo4_LearningPathPlanning(setting):

    n_sample = 100

    # Environment
    env = FireEnvironment(64, 64)
    # Vehicle to generate observation mask
    vehicle = Vehicle(n_time_windows=512,
                      grid_size=(64, 64),
                      planner_type='Default')
    # Trainer and Estimator
    dyn_autoencoder = DynamicAutoEncoder(SETTING,
                                         grid_size=(env.map_width,
                                                    env.map_height),
                                         n_state=3,
                                         n_obs=3,
                                         encoding_dim=16,
                                         gru_hidden_dim=16)
    ### DQN agent
    dqn_agent = DQN_Agent(state_size=16,
                          action_size=4,
                          replay_memory_size=1000,
                          batch_size=64,
                          gamma=0.99,
                          learning_rate=0.01,
                          target_tau=0.01,
                          update_rate=1,
                          seed=0)
    # Train Data Buffer
    memory = SingleTrajectoryBuffer(N_MEMORY_SIZE)
    # Train Iteration Logger

    writer = SummaryWriter()
    # Video Writier
    video_writer1 = ImageStreamWriter('LearningPlanner.avi',
                                      FPS,
                                      image_size=(1200, 820))

    # Add concat. text
    setting_text = ''
    for k, v in setting.items():
        setting_text += k
        setting_text += ':'
        setting_text += str(v)
        setting_text += '\t'
    writer.add_text('setting', setting_text)

    ########################################
    ### Interacting with the Environment ###
    ########################################
    mask_obs, obs, state = env.reset()
    state_est_grid = dyn_autoencoder.u_k

    ### Loss Monitors ###
    list_loss = []
    list_cross_entropy_loss = []
    list_entropy_loss = []
    list_rewards = []
    list_new_fire_count = []
    list_action = []

    ### Filling the Data Buffer ###
    for i in tqdm.tqdm(range(N_TRAIN_WAIT)):
        map_visit_mask, img_resized = vehicle.full_mask()
        mask_obs, obs, state, reward, info = env.step(map_visit_mask)
        memory.add(mask_obs.detach().long(),
                   state.detach().long(),
                   map_visit_mask.detach().long())

    for i in tqdm.tqdm(range(N_TOTAL_TIME_STEPS)):

        # determine epsilon-greedy action from current sate
        h_k = dyn_autoencoder.h_k.squeeze().data.cpu().numpy()
        epsilon = 0.1
        action = dqn_agent.act(h_k, epsilon)
        list_action.append(action)

        ### Collect Data from the Env. ###
        map_visit_mask, img_resized = vehicle.plan_a_trajectory(
            state_est_grid, n_sample, action)
        mask_obs, obs, state, reward, info = env.step(map_visit_mask)
        memory.add(mask_obs.detach().long(),
                   state.detach().long(),
                   map_visit_mask.detach().long())

        ### Run the Estimator ###
        state_est_grid = dyn_autoencoder.step(mask_obs, map_visit_mask)
        h_kp1 = dyn_autoencoder.h_k.squeeze().data.cpu().numpy()

        #### Update the reinforcement learning agent ###
        dqn_agent.step(h_k, action, reward, h_kp1, done=False)

        list_rewards.append(reward)
        list_new_fire_count.append(info['new_fire_count'])

        ################################
        ### Rendering and Save Video ###
        ################################
        img_env = env.output_image()
        img_agent = dyn_autoencoder.output_image(state_est_grid)

        # State Est
        #blank = np.zeros((400, 200, 3))
        img_top = img_env  #np.concatenate((blank, img_env[:,:800], blank), axis=1)
        blank = np.zeros((20, 1200, 3))
        img_top = np.concatenate((img_top, blank), axis=0)
        img_top = (img_top * 255).astype('uint8')

        img_state_est_grid_uint8 = (img_agent * 255).astype('uint8')
        backtorgb = cv2.cvtColor(img_state_est_grid_uint8, cv2.COLOR_GRAY2RGB)
        img_bayes_uint8 = np.concatenate((img_top, backtorgb),
                                         axis=0)  #<-- to be saved
        render('Dynamic Auto Encoder', img_bayes_uint8, 1)

        # Save video #
        video_writer1.write_image_frame(img_bayes_uint8)

        ### Training ###
        loss_val, loss_val_cross, loss_val_ent, O_np_val = dyn_autoencoder.update(
            memory, N_TRAIN_BATCH, N_TRAIN_WINDOW)
        list_loss.append(loss_val)
        list_cross_entropy_loss.append(loss_val_cross)
        list_entropy_loss.append(loss_val_ent)

        if i % N_LOGGING_PERIOD == 0:
            avg_loss = np.mean(np.array(list_loss))
            list_loss = []
            writer.add_scalar('dynautoenc/loss', avg_loss, i)

            avg_loss_cross = np.mean(np.array(list_cross_entropy_loss))
            list_cross_entropy_loss = []
            writer.add_scalar('dynautoenc/crossentropy', avg_loss_cross, i)

            avg_loss_entropy = np.mean(np.array(list_entropy_loss))
            list_entropy_loss = []
            writer.add_scalar('dynautoenc/shannonentropy', avg_loss_entropy, i)

            avg_reward = np.mean(np.array(list_rewards))
            list_rewards = []
            writer.add_scalar('perform/rewards', avg_reward, i)

            avg_new_fire_count = np.mean(np.array(list_new_fire_count))
            list_new_fire_count = []
            writer.add_scalar('perform/new_fire_counts', avg_new_fire_count, i)

            writer.add_scalar('perform/pc_coverd_new_fire',
                              avg_reward / avg_new_fire_count, i)

            action_0_count = list_action.count(0)
            action_1_count = list_action.count(1)
            action_2_count = list_action.count(2)
            action_3_count = list_action.count(3)

            writer.add_scalar('action_count/0',
                              action_0_count / len(list_action), i)
            writer.add_scalar('action_count/1',
                              action_1_count / len(list_action), i)
            writer.add_scalar('action_count/2',
                              action_2_count / len(list_action), i)
            writer.add_scalar('action_count/3',
                              action_3_count / len(list_action), i)
            list_action = []

            writer.add_scalar('obs_state0/o00', O_np_val[0][0], i)
            writer.add_scalar('obs_state1/o01', O_np_val[0][1], i)
            writer.add_scalar('obs_state2/o02', O_np_val[0][2], i)
            writer.add_scalar('obs_state0/o10', O_np_val[1][0], i)
            writer.add_scalar('obs_state1/o11', O_np_val[1][1], i)
            writer.add_scalar('obs_state2/o12', O_np_val[1][2], i)
            writer.add_scalar('obs_state0/o20', O_np_val[2][0], i)
            writer.add_scalar('obs_state1/o21', O_np_val[2][1], i)
            writer.add_scalar('obs_state2/o22', O_np_val[2][2], i)

            print(
                'losses at iteration: %d, losses: total %.3f, cross %.3f, shannon %.3f'
                % (i, avg_loss, avg_loss_cross, avg_loss_entropy))
            print('memory size at iteration: %d, size: %d' %
                  (i, len(memory.obs_memory)))

        if (i + 1) % N_SAVING_PERIOD == 0:
            f_name = setting['name']
            dyn_autoencoder.save_the_model(i, f_name)
            dqn_agent.save_the_model(i, f_name)

    video_writer1.close()
def demo3_SysID(setting):

    n_sample = 1
    action_param = 3

    # Environment
    env = FireEnvironment(64, 64)
    # Vehicle to generate observation mask
    vehicle = Vehicle(n_time_windows=512,
                      grid_size=(64, 64),
                      planner_type='Random')
    # Trainer and Estimator
    dyn_autoencoder = DynamicAutoEncoder(SETTING,
                                         grid_size=(env.map_width,
                                                    env.map_height),
                                         n_state=3,
                                         n_obs=3,
                                         encoding_dim=16,
                                         gru_hidden_dim=16)
    # Train Data Buffer
    memory = SingleTrajectoryBuffer(N_MEMORY_SIZE)
    # Train Iteration Logger

    writer = SummaryWriter()
    # Video Writier
    video_writer1 = ImageStreamWriter('RandomPathSysId.avi',
                                      FPS,
                                      image_size=(1200, 820))

    # Add concat. text
    setting_text = ''
    for k, v in setting.items():
        setting_text += k
        setting_text += ':'
        setting_text += str(v)
        setting_text += '\t'
    writer.add_text('setting', setting_text)

    ########################################
    ### Interacting with the Environment ###
    ########################################
    mask_obs, obs, state = env.reset()
    state_est_grid = dyn_autoencoder.u_k
    map_visit_mask, img_resized = vehicle.plan_a_trajectory(
        state_est_grid, n_sample, action_param)

    ### Loss Monitors ###
    list_loss = []
    list_cross_entropy_loss = []
    list_entropy_loss = []

    ### Filling the Data Buffer ###
    for i in tqdm.tqdm(range(N_TRAIN_WAIT)):
        map_visit_mask, img_resized = vehicle.full_mask()
        mask_obs, obs, state, reward, info = env.step(map_visit_mask)
        memory.add(mask_obs, state, map_visit_mask)

    for i in tqdm.tqdm(range(N_TOTAL_TIME_STEPS)):

        # determine epsilon-greedy action from current sate
        h_k = dyn_autoencoder.h_k.squeeze().data.cpu().numpy()

        ### Collect Data from the Env. ###
        map_visit_mask, img_resized = vehicle.plan_a_trajectory(
            state_est_grid, n_sample, action_param)
        mask_obs, obs, state, reward, info = env.step(map_visit_mask)
        memory.add(mask_obs, state, map_visit_mask)

        ### Run the Estimator ###
        state_est_grid = dyn_autoencoder.step(mask_obs, map_visit_mask)
        h_kp1 = dyn_autoencoder.h_k.squeeze().data.cpu().numpy()

        ################################
        ### Rendering and Save Video ###
        ################################
        img_env = env.output_image()
        img_agent = dyn_autoencoder.output_image(state_est_grid)

        # State Est
        #blank = np.zeros((400, 200, 3))
        img_top = img_env  #np.concatenate((blank, img_env[:,:800], blank), axis=1)
        blank = np.zeros((20, 1200, 3))
        img_top = np.concatenate((img_top, blank), axis=0)
        img_top = (img_top * 255).astype('uint8')

        img_state_est_grid_uint8 = (img_agent * 255).astype('uint8')
        backtorgb = cv2.cvtColor(img_state_est_grid_uint8, cv2.COLOR_GRAY2RGB)
        img_bayes_uint8 = np.concatenate((img_top, backtorgb),
                                         axis=0)  #<-- to be saved
        render('Dynamic Auto Encoder', img_bayes_uint8, 1)

        # Save video #
        video_writer1.write_image_frame(img_bayes_uint8)

        ### Training ###
        loss_val, loss_val_cross, loss_val_ent, O_np_val = dyn_autoencoder.update(
            memory, N_TRAIN_BATCH, N_TRAIN_WINDOW)
        list_loss.append(loss_val)
        list_cross_entropy_loss.append(loss_val_cross)
        list_entropy_loss.append(loss_val_ent)

        if i % N_LOGGING_PERIOD == 0:
            avg_loss = np.mean(np.array(list_loss))
            list_loss = []
            writer.add_scalar('dynautoenc/loss', avg_loss, i)

            avg_loss_cross = np.mean(np.array(list_cross_entropy_loss))
            list_cross_entropy_loss = []
            writer.add_scalar('dynautoenc/crossentropy', avg_loss_cross, i)

            avg_loss_entropy = np.mean(np.array(list_entropy_loss))
            list_entropy_loss = []
            writer.add_scalar('dynautoenc/shannonentropy', avg_loss_entropy, i)

            writer.add_scalar('obs_state0/o00', O_np_val[0][0], i)
            writer.add_scalar('obs_state1/o01', O_np_val[0][1], i)
            writer.add_scalar('obs_state2/o02', O_np_val[0][2], i)
            writer.add_scalar('obs_state0/o10', O_np_val[1][0], i)
            writer.add_scalar('obs_state1/o11', O_np_val[1][1], i)
            writer.add_scalar('obs_state2/o12', O_np_val[1][2], i)
            writer.add_scalar('obs_state0/o20', O_np_val[2][0], i)
            writer.add_scalar('obs_state1/o21', O_np_val[2][1], i)
            writer.add_scalar('obs_state2/o22', O_np_val[2][2], i)

            print(
                'losses at iteration: %d, losses: total %.3f, cross %.3f, shannon %.3f'
                % (i, avg_loss, avg_loss_cross, avg_loss_entropy))
            print('memory size at iteration: %d, size: %d' %
                  (i, len(memory.obs_memory)))

        if (i + 1) % N_SAVING_PERIOD == 0:
            f_name = setting['name']
            dyn_autoencoder.save_the_model(i, f_name)

    video_writer1.close()
Beispiel #4
0
def demo5_ComparePolicies(setting, env):

    n_sample = 2048

    # Vehicle to generate observation mask
    vehicle = Vehicle(n_time_windows=64, grid_size=(64,64), planner_type='Default')
    # Trainer and Estimator
    dyn_autoencoder = DynamicAutoEncoder(SETTING, grid_size = (env.map_width, env.map_height), n_state=3, n_obs=3, encoding_dim=4, gru_hidden_dim=4)

    ### DQN agent  
    dqn_agent = DQN_Agent(state_size=4, action_size=4, replay_memory_size=1000, batch_size=64, gamma=0.99, learning_rate=0.01, target_tau=0.01, update_rate=1, seed=0)

    # Train Data Buffer
    memory = SingleTrajectoryBuffer(N_MEMORY_SIZE)
    
    # Video Writier
    '''
    video_f_name = 'UsePlanner'+ '_' + setting['name'] + '_' + setting['policy_type'] + '.avi'
    video_writer1 = ImageStreamWriter(video_f_name, FPS, image_size=(1200,820))
    '''

    # Train Iteration Logger

    writer = SummaryWriter()

    # Add concat. text
    setting_text = ''
    for k,v in setting.items():
        setting_text += k
        setting_text += ':'
        setting_text += str(v)
        setting_text += '\t'
    writer.add_text('setting', setting_text)

    ########################################
    ### Interacting with the Environment ###
    ########################################

    ### Loss Monitors ###
    list_rewards = []
    list_new_fire_count = []
    list_action = []
    list_loss = []

    ### Filling the Data Buffer ###
    for i in tqdm.tqdm(range(N_TRAIN_WAIT)):         
        map_visit_mask, img_resized =  vehicle.full_mask()
        mask_obs, obs, state, reward, info = env.step(map_visit_mask)
        memory.add(mask_obs.detach().long(), state.detach().long(), map_visit_mask.detach().long())

    mask_obs, obs, state = env.reset()
    state_est_grid = dyn_autoencoder.u_k

    for i in tqdm.tqdm(range(N_TOTAL_TIME_STEPS)):

        # determine epsilon-greedy action from current sate
        h_k = dyn_autoencoder.h_k.squeeze().data.cpu().numpy()
        epsilon = 0.1
        action = dqn_agent.act(h_k, epsilon)
          
        
        ### Collect Data from the Env. ###
        # Plan a trajectory
        policy_type = setting['policy_type']
        if policy_type == 'Default':
            map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample, action)  

        elif policy_type == 'Random':
            action = 777
            map_visit_mask, img_resized = vehicle.generate_a_random_trajectory()

        elif policy_type == 'Act0':
            action = 0
            map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample, action)

        elif policy_type == 'Act1':
            action = 1
            map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample, action)

        elif policy_type == 'Act2':
            action = 2
            map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample, action)

        else:
            action = 3
            map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample, action)

        list_action.append(action)
        

        # Collect the masked observation
        mask_obs, obs, state, reward, info = env.step(map_visit_mask)
        memory.add(mask_obs.detach().long(), state.detach().long(), map_visit_mask.detach().long())

        ### Run the Estimator ###
        state_est_grid = dyn_autoencoder.step(mask_obs, map_visit_mask)
        h_kp1 = dyn_autoencoder.h_k.squeeze().data.cpu().numpy()

        list_rewards.append(reward)
        list_new_fire_count.append(info['new_fire_count'])

        
        update = True
        #### Update the reinforcement learning agent and Dyn Auto Enc ###
        if policy_type != 'Random':
            dqn_agent.step(h_k, action, reward, h_kp1, False, update)
            loss_val, loss_val_cross, loss_val_ent, O_np_val =  dyn_autoencoder.update(memory, N_TRAIN_BATCH, N_TRAIN_WINDOW, update)
            list_loss.append(loss_val)


        ################################
        ### Rendering and Save Video ###
        ################################        
        img_env   = env.output_image()
        img_agent = dyn_autoencoder.output_image(state_est_grid)

        # State Est
        #blank = np.zeros((400, 200, 3))
        img_top = img_env  #np.concatenate((blank, img_env[:,:800], blank), axis=1)
        blank = np.zeros((20, 1200, 3))
        img_top = np.concatenate((img_top, blank), axis=0)
        img_top = (img_top*255).astype('uint8')

        img_state_est_grid_uint8 = (img_agent*255).astype('uint8')
        backtorgb = cv2.cvtColor(img_state_est_grid_uint8, cv2.COLOR_GRAY2RGB)
        img_bayes_uint8 = np.concatenate((img_top, backtorgb), axis=0) #<-- to be saved
        render('Dynamic Auto Encoder', img_bayes_uint8, 1)

        # Save video #
        #video_writer1.write_image_frame(img_bayes_uint8)

        if i%N_LOGGING_PERIOD == 0:

            avg_reward = np.mean(np.array(list_rewards))
            list_rewards = []
            writer.add_scalar('perform/rewards', avg_reward, i)

            avg_new_fire_count = max(np.mean(np.array(list_new_fire_count)), 1) # to avoid division by zero
            list_new_fire_count = []
            writer.add_scalar('perform/new_fire_counts', avg_new_fire_count, i)
            writer.add_scalar('perform/pc_coverd_new_fire', avg_reward/avg_new_fire_count, i)

            if policy_type != 'Random':

                avg_loss = np.mean(np.array(list_loss))
                list_loss = []
                writer.add_scalar('dynautoenc/loss', avg_loss, i)

                action_0_count = list_action.count(0)
                action_1_count = list_action.count(1)
                action_2_count = list_action.count(2)
                action_3_count = list_action.count(3)

                writer.add_scalar('action_count/0', action_0_count/len(list_action), i)
                writer.add_scalar('action_count/1', action_1_count/len(list_action), i)
                writer.add_scalar('action_count/2', action_2_count/len(list_action), i)
                writer.add_scalar('action_count/3', action_3_count/len(list_action), i)
                list_action = []

                writer.add_scalar('obs_state0/o00', O_np_val[0][0], i)
                writer.add_scalar('obs_state1/o01', O_np_val[0][1], i)
                writer.add_scalar('obs_state2/o02', O_np_val[0][2], i)
                writer.add_scalar('obs_state0/o10', O_np_val[1][0], i)
                writer.add_scalar('obs_state1/o11', O_np_val[1][1], i)
                writer.add_scalar('obs_state2/o12', O_np_val[1][2], i)
                writer.add_scalar('obs_state0/o20', O_np_val[2][0], i)
                writer.add_scalar('obs_state1/o21', O_np_val[2][1], i)
                writer.add_scalar('obs_state2/o22', O_np_val[2][2], i)
Beispiel #5
0
def initialize_environment(min_request_time: int, max_request_time: int, max_wait_times: List[int],
                           detour_ratios: List[float], vehicle_number: int, vehicle_speed: float) \
        -> Tuple[MultiDiGraph, np.ndarray, np.ndarray, np.ndarray, Dict[int, Set[Order]], List[Vehicle]]:
    """
    初始化环境
    :param max_wait_times: 订单最大等待时间集合
    :param detour_ratios: 订单最大绕路比集合
    :param min_request_time: 提取订单的最小请求时间
    :param max_request_time: 提取订单的最大请求时间
    :param vehicle_number: 生成的车辆数目
    :param vehicle_speed: 车辆的速度
    :return graph: 路网图
    :return shortest_distance:最短路径长度矩阵
    :return shortest_path: 最短路径矩阵
    :return shortest_path_with_minute: 1分钟以内的最短路径矩阵
    :return orders: 各个时刻的订单信息
    :return vehicles: 初始化的车辆列表
    """
    print("read data from disc")
    trip_order_data = pd.read_csv("./order_data/trip_order_data.csv")
    car_fuel_consumption_info = pd.read_csv("car_fuel_consumption_data/car_fuel_consumption_info.csv")
    graph = ox.load_graphml("Manhattan.graphml", folder="./network_data/")
    shortest_distance = np.load("./network_data/shortest_distance.npy")
    shortest_path = np.load("./network_data/shortest_path.npy")
    shortest_path_with_minute = np.load("./network_data/shortest_path_with_minute.npy")

    print("build osm_id to index map")
    osm_id2index = {osm_id: index for index, osm_id in enumerate(graph.nodes)}
    location_map = {osm_id2index[node[0]]: (node[1]['x'], node[1]['y']) for node in graph.nodes(data=True)}
    index2osm_id = {index: osm_id for osm_id, index in osm_id2index.items()}
    GeoLocation.set_location_map(location_map)
    GeoLocation.set_index2osm_id(index2osm_id)

    print("generate order data")
    trip_order_data = trip_order_data[min_request_time <= trip_order_data["time"]]
    trip_order_data = trip_order_data[max_request_time > trip_order_data["time"]]

    order_number = trip_order_data.shape[0]
    pick_up_index_series = trip_order_data["pick_up_index"].values
    drop_off_index_series = trip_order_data["drop_off_index"].values
    request_time_series = trip_order_data["time"].values
    receive_fare_series = (trip_order_data["total_amount"] - trip_order_data["tip_amount"]).values  # 我们不考虑订单的中tip成分
    n_riders_series = trip_order_data["passenger_count"].values

    orders = {}
    for request_time in range(min_request_time, max_request_time):
        orders[request_time] = set()

    for i in range(order_number):
        order_id = i
        start_location = OrderLocation(int(pick_up_index_series[i]), OrderLocation.PICK_UP_TYPE)
        end_location = OrderLocation(int(drop_off_index_series[i]), OrderLocation.DROP_OFF_TYPE)
        request_time = int(request_time_series[i])
        max_wait_time = np.random.choice(max_wait_times)
        order_distance = shortest_distance[start_location.osm_index, end_location.osm_index]
        if order_distance == 0.0:
            continue
        receive_fare = receive_fare_series[i]
        detour_ratio = np.random.choice(detour_ratios)
        n_riders = int(n_riders_series[i])
        order = Order(order_id, start_location, end_location, request_time, max_wait_time,
                      order_distance, receive_fare, detour_ratio, n_riders)
        orders[request_time].add(order)

    print("generate vehicle data")
    Vehicle.set_average_speed(vehicle_speed)
    car_osm_ids = np.random.choice(graph.nodes, size=vehicle_number)
    cars_info = car_fuel_consumption_info.sample(n=vehicle_number)
    vehicles = []
    for i in range(vehicle_number):
        vehicle_id = i
        location = GeoLocation(osm_id2index[int(car_osm_ids[i])])
        car_info = cars_info.iloc[i, :]
        available_seats = int(car_info["seats"])
        cost_per_distance = float(car_info["fuel_consumption"]) / 6.8 * 2.5 / 1.609344
        vehicle = Vehicle(vehicle_id, location, available_seats, cost_per_distance, Vehicle.WITHOUT_MISSION_STATUS)
        vehicles.append(vehicle)

    print("finish generate data")
    return graph, shortest_distance, shortest_path, shortest_path_with_minute, orders, vehicles
Beispiel #6
0
def train(fullcover, name, setting):

    n_sample = 20

    # Environment
    env = FireEnvironment(64, 64)

    # Vehicle to generate observation mask
    vehicle = Vehicle(n_time_windows=1000, grid_size=(64,64), planner_type=setting['planner_type'])

    # Trainer and Estimator
    dyn_autoencoder = DynamicAutoEncoder(SETTING, grid_size = (env.map_width, env.map_height), n_state=3, n_obs=3, encoding_dim=16, gru_hidden_dim=16)

    # Train Data Buffer
    memory = SingleTrajectoryBuffer(N_MEMORY_SIZE)

    ### DQN agent
    dqn_agent = DQN_Agent(state_size=16, action_size=4, replay_memory_size=1000, batch_size=64, gamma=0.99, learning_rate=0.01, target_tau=0.01, update_rate=1, seed=0)

    # Train Iteration Logger
    from torch.utils.tensorboard import SummaryWriter
    writer = SummaryWriter()

    # Add concat. text
    setting_text = ''
    for k,v in setting.items():
        setting_text += k
        setting_text += str(v)
        setting_text += '\t'
    writer.add_text('setting', setting_text)


    ########################################
    ### Interacting with the Environment ###
    ########################################
    mask_obs, obs, state = env.reset()
    map_visit_mask, img_resized = vehicle.full_mask()
    state_est_grid = dyn_autoencoder.u_k

    ### Loss Monitors ###
    list_loss = []
    list_cross_entropy_loss = []
    list_entropy_loss = []
    list_rewards = []
    list_count_fire_visit = []
    list_count_all_fire = []
    list_action = []

    ### Filling the Data Buffer ###
    for i in tqdm.tqdm(range(N_TRAIN_WAIT)):         
        if fullcover:
            map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample, action)
        else:
            map_visit_mask, img_resized = vehicle.full_mask()

        mask_obs, obs, state, reward = env.step(map_visit_mask)
        memory.add(mask_obs, state, map_visit_mask)
        


    for i in tqdm.tqdm(range(N_TOTAL_TIME_STEPS)):

        # determine epsilon-greedy action from current sate
        h_k = dyn_autoencoder.h_k.squeeze().data.cpu().numpy()
        epsilon = 0.1
        action = dqn_agent.act(h_k, epsilon)
        list_action.append(action)    

        ### Collect Data from the Env. ###
        if fullcover:
            map_visit_mask, img_resized = vehicle.full_mask()
        else:
            map_visit_mask, img_resized = vehicle.plan_a_trajectory(state_est_grid, n_sample, action)
            
        
        mask_obs, obs, state, reward = env.step(map_visit_mask)
        memory.add(mask_obs, state, map_visit_mask)

        ### Run the Estimator ###
        state_est_grid = dyn_autoencoder.step(mask_obs, map_visit_mask)
        h_kp1 = dyn_autoencoder.h_k.squeeze().data.cpu().numpy()

        #### Update the reinforcement learning agent ###
        dqn_agent.step(h_k, action, reward, h_kp1, done=False)

        list_rewards.append(reward)
        fire_count = (torch.sum(state[2])).item()
        fire_visit = (torch.sum(mask_obs.permute(2,0,1) * state[2].unsqueeze(0))).item()

        if fire_count < 1:
            print('no fire')
        else:
            list_count_fire_visit.append(fire_visit)
            list_count_all_fire.append(fire_count)

        ### Render the Env. and the Est. ###
        if i % N_RENDER_PERIOD == 0:
            img_env   = env.output_image()
            img_state_est_grid = dyn_autoencoder.output_image(state_est_grid)
            
            render('env', img_env, 1)
            render('img_state_est_grid', img_state_est_grid, 1)            


        ### Training ###
        loss_val, loss_val_cross, loss_val_ent, O_np_val =  dyn_autoencoder.update(memory, N_TRAIN_BATCH, N_TRAIN_WINDOW)
        list_loss.append(loss_val)
        list_cross_entropy_loss.append(loss_val_cross)
        list_entropy_loss.append(loss_val_ent)

        if i%N_LOGGING_PERIOD == 0:
            avg_loss = np.mean(np.array(list_loss))
            list_loss = []
            writer.add_scalar('dynautoenc/loss', avg_loss, i)

            avg_loss_cross = np.mean(np.array(list_cross_entropy_loss))
            list_cross_entropy_loss = []
            writer.add_scalar('dynautoenc/crossentropy', avg_loss_cross, i)

            avg_loss_entropy = np.mean(np.array(list_entropy_loss))
            list_entropy_loss = []
            writer.add_scalar('dynautoenc/shannonentropy', avg_loss_entropy, i)

            avg_reward = np.mean(np.array(list_rewards))
            list_rewards = []
            writer.add_scalar('perform/rewards', avg_reward, i)

            avg_count_fire_visit = np.mean(np.array(list_count_fire_visit))
            list_count_fire_visit = []
            writer.add_scalar('perform/avg_count_fire_visit', avg_count_fire_visit, i)

            avg_count_all_fire = np.mean(np.array(list_count_all_fire))
            list_count_all_fire = []
            writer.add_scalar('perform/avg_count_all_fire', avg_count_all_fire, i)


            action_0_count = list_action.count(0)
            action_1_count = list_action.count(1)
            action_2_count = list_action.count(2)
            action_3_count = list_action.count(3)
            list_action = []

            if setting['planner_type'] == 'Default':
                writer.add_scalar('action_count/0', action_0_count, i)
                writer.add_scalar('action_count/1', action_1_count, i)
                writer.add_scalar('action_count/2', action_2_count, i)
                writer.add_scalar('action_count/3', action_3_count, i)


            writer.add_scalar('obs_state0/o00', O_np_val[0][0], i)
            writer.add_scalar('obs_state1/o01', O_np_val[0][1], i)
            writer.add_scalar('obs_state2/o02', O_np_val[0][2], i)
            writer.add_scalar('obs_state0/o10', O_np_val[1][0], i)
            writer.add_scalar('obs_state1/o11', O_np_val[1][1], i)
            writer.add_scalar('obs_state2/o12', O_np_val[1][2], i)
            writer.add_scalar('obs_state0/o20', O_np_val[2][0], i)
            writer.add_scalar('obs_state1/o21', O_np_val[2][1], i)
            writer.add_scalar('obs_state2/o22', O_np_val[2][2], i)

            print('losses at iteration: %d, losses: total %.3f, cross %.3f, shannon %.3f' % (i, avg_loss, avg_loss_cross, avg_loss_entropy))
            print('memory size at iteration: %d, size: %d' % (i, len(memory.obs_memory)))

        if (i+1)%N_SAVING_PERIOD==0:
            f_name = name
            dyn_autoencoder.save_the_model(i, f_name)