def __init__(self, data_dir, dataset_kind, config):

        self.data_dir = data_dir

        self.dataset_kind = dataset_kind
        self.image_size = get_image_size(dataset_kind)

        self.config = config
예제 #2
0
    def __init__(self, data_dir, dataset_kind, config):

        self.data_dir = data_dir
        # self.seq_len = config.obs_len + config.pred_len
        # self.max_n_peds = config.max_n_peds
        # self.n_neighbor_pixels = config.n_neighbor_pixels
        # self.grid_side = config.grid_side
        self.dataset_kind = dataset_kind
        self.image_size = get_image_size(dataset_kind)

        self.config = config
 def __init__(self, data_dir, seq_len, max_n_peds, n_neighbor_pixels,
              grid_side, dataset_kind):
     #Self refers to "this" or own object. self.data_dir refers to the data directory
     #of the object made for the data and is an attribute of the object
     #arguments are stored as attributes.
     self.data_dir = data_dir
     self.seq_len = seq_len
     self.max_n_peds = max_n_peds
     self.n_neighbor_pixels = n_neighbor_pixels
     self.grid_side = grid_side
     self.dataset_kind = dataset_kind
     self.image_size = get_image_size(dataset_kind)
    def normalize_pos_df(pos_df, dataset_kind):
        image_size = np.array(get_image_size(dataset_kind))

        xy = np.array(pos_df[["x", "y"]])
        
        xy += image_size / 2
        # 裁剪
        xy[:, 0] = np.clip(xy[:, 0], 0.0, image_size[0] - 1)
        xy[:, 1] = np.clip(xy[:, 1], 0.0, image_size[1] - 1)

        # 标准化
        xy /= image_size

        # 分别标准化位置(x,y)
        pos_df_norm = pd.DataFrame({
            "frame": pos_df["frame"],
            "id": pos_df["id"],
            "x": xy[:, 0],
            "y": xy[:, 1]
        })
        return pos_df_norm
    def normalize_pos_df(pos_df, dataset_kind):
        image_size = np.array(get_image_size(dataset_kind))

        xy = np.array(pos_df[["x", "y"]])
        # originally (0, 0) is the center of the frame,
        # therefore move (0, 0) to top-left
        xy += image_size / 2
        # clipping
        xy[:, 0] = np.clip(xy[:, 0], 0.0, image_size[0] - 1)
        xy[:, 1] = np.clip(xy[:, 1], 0.0, image_size[1] - 1)

        # normalize
        xy /= image_size

        # normalize position (x, y) respectively
        pos_df_norm = pd.DataFrame({
            "frame": pos_df["frame"],
            "id": pos_df["id"],
            "x": xy[:, 0],
            "y": xy[:, 1]
        })
        return pos_df_norm
예제 #6
0
    def _build_model(self, config: ModelConfig):
        o_obs_batch = []
        for t in range(config.obs_len):
            print("t: ", t)
            x_t = Lambda(lambda x: x[:, t, :, :])(self.x_input)
            grid_t = Lambda(lambda grid: grid[:, t, ...])(self.grid_input)

            h_t, c_t = [], []
            o_t = []

            if t == 0:
                prev_h_t = Lambda(lambda z: z[:, t, :, :])(self.zeros_input)
                prev_c_t = Lambda(lambda z: z[:, t, :, :])(self.zeros_input)

            # compute $H_t$
            # (n_samples, max_n_peds, (grid_side ** 2) * lstm_state_dim)
            H_t = self._compute_social_tensor(grid_t, prev_h_t, config)

            for ped_index in range(config.max_n_peds):
                print("(t, li):", t, ped_index)
                # ----------------------------------------
                # compute $e_i^t$ and $a_i^t$
                # ----------------------------------------

                x_pos_it = Lambda(lambda x_t: x_t[:, ped_index, 1:])(x_t)
                e_it = self.W_e_relu(x_pos_it)

                # compute a_it
                H_it = Lambda(lambda H_t: H_t[:, ped_index, ...])(H_t)
                a_it = self.W_a_relu(H_it)

                # build concatenated embedding states for LSTM input
                emb_it = Concatenate()([e_it, a_it])
                emb_it = Reshape((1, 2 * config.emb_dim))(emb_it)

                # initial_state = h_i_tになっている
                # h_i_tを次のx_t_pに対してLSTMを適用するときのinitial_stateに使えば良い
                prev_states_it = [
                    prev_h_t[:, ped_index], prev_c_t[:, ped_index]
                ]
                lstm_output, h_it, c_it = self.lstm_layer(
                    emb_it, prev_states_it)

                h_t.append(h_it)
                c_t.append(c_it)

                # compute output_it, which shape is (batch_size, 5)
                o_it = self.W_p(lstm_output)
                o_t.append(o_it)

            # convert lists of h_it/c_it/o_it to h_t/c_t/o_t respectively
            h_t = _stack_permute_axis_zero(h_t)
            c_t = _stack_permute_axis_zero(c_t)
            o_t = _stack_permute_axis_zero(o_t)

            o_obs_batch.append(o_t)

            # current => previous
            prev_h_t = h_t
            prev_c_t = c_t

        # convert list of output_t to output_batch
        o_obs_batch = _stack_permute_axis_zero(o_obs_batch)

        # ----------------------------------------------------------------------
        # Prediction
        # ----------------------------------------------------------------------
        # この時点でprev_h_t, prev_c_tにはobs_lenの最終的な状態が残っている

        x_obs_t_final = Lambda(lambda x: x[:, -1, :, :])(self.x_input)
        pid_obs_t_final = Lambda(lambda x_t: x_t[:, :, 0])(x_obs_t_final)
        pid_obs_t_final = Lambda(lambda p_t: K.expand_dims(p_t, 2))(
            pid_obs_t_final)

        x_pred_batch = []
        o_pred_batch = []
        for t in range(config.pred_len):
            if t == 0:
                prev_o_t = Lambda(lambda o_b: o_b[:, -1, :, :])(o_obs_batch)

            pred_pos_t = normal2d_sample(prev_o_t)
            # assume all the pedestrians in the final observation frame are
            # exist in the prediction frames.
            x_pred_t = Concatenate(axis=2)([pid_obs_t_final, pred_pos_t])

            grid_t = tf_grid_mask(x_pred_t,
                                  get_image_size(config.test_dataset_kind),
                                  config.n_neighbor_pixels, config.grid_side)

            h_t, c_t, o_t = [], [], []

            # compute $H_t$
            # (n_samples, max_n_peds, (grid_side ** 2) * lstm_state_dim)
            H_t = self._compute_social_tensor(grid_t, prev_h_t, config)

            for i in range(config.max_n_peds):
                print("(t, li):", t, i)

                prev_o_it = Lambda(lambda o_t: o_t[:, i, :])(prev_o_t)
                H_it = Lambda(lambda H_t: H_t[:, i, ...])(H_t)

                # pred_pos_it: (batch_size, 2)
                pred_pos_it = normal2d_sample(prev_o_it)

                # compute e_it and a_it
                # e_it: (batch_size, emb_dim)
                # a_it: (batch_size, emb_dim)
                e_it = self.W_e_relu(pred_pos_it)
                a_it = self.W_a_relu(H_it)

                # build concatenated embedding states for LSTM input
                # emb_it: (batch_size, 1, 2 * emb_dim)
                emb_it = Concatenate()([e_it, a_it])
                emb_it = Reshape((1, 2 * config.emb_dim))(emb_it)

                # initial_state = h_i_tになっている
                # h_i_tを次のx_t_pに対してLSTMを適用するときのinitial_stateに使えば良い
                prev_states_it = [prev_h_t[:, i], prev_c_t[:, i]]
                lstm_output, h_it, c_it = self.lstm_layer(
                    emb_it, prev_states_it)

                h_t.append(h_it)
                c_t.append(c_it)

                # compute output_it, which shape is (batch_size, 5)
                o_it = self.W_p(lstm_output)
                o_t.append(o_it)

            # convert lists of h_it/c_it/o_it to h_t/c_t/o_t respectively
            h_t = _stack_permute_axis_zero(h_t)
            c_t = _stack_permute_axis_zero(c_t)
            o_t = _stack_permute_axis_zero(o_t)

            o_pred_batch.append(o_t)
            x_pred_batch.append(x_pred_t)

            # current => previous
            prev_h_t = h_t
            prev_c_t = c_t
            prev_o_t = o_t

        # convert list of output_t to output_batch
        o_pred_batch = _stack_permute_axis_zero(o_pred_batch)
        x_pred_batch = _stack_permute_axis_zero(x_pred_batch)

        # o_concat_batch = Lambda(lambda os: tf.concat(os, axis=1))(
        #     [o_obs_batch, o_pred_batch])

        # 本当に学習に必要なモデルはこっちのはず
        self.train_model = Model(
            [self.x_input, self.grid_input, self.zeros_input], o_pred_batch)

        lr = 0.003
        optimizer = RMSprop(lr=lr)
        self.train_model.compile(optimizer, self._compute_loss)

        self.sample_model = Model(
            [self.x_input, self.grid_input, self.zeros_input], x_pred_batch)
    def __init__(self, data_dir, dataset_kind):
        self._data_dir = data_dir
        self.image_size = get_image_size(dataset_kind)

        pass
    def _build_model(self, config: ModelConfig):
        o_obs_batch = []
        for t in range(config.obs_len):
            print("t: ", t)
            x_t = Lambda(lambda x: x[:, t, :, :])(self.x_input)
            grid_t = Lambda(lambda grid: grid[:, t, ...])(self.grid_input)

           

            if t == 0:
                prev_h_t = Lambda(lambda z: z[:, t, :, :])(self.zeros_input)
                prev_c_t = Lambda(lambda z: z[:, t, :, :])(self.zeros_input)

            # 计算共享隐藏状态
           
            H_t = self._compute_social_tensor(grid_t, prev_h_t, config)

            

            prev_ht_ct_ot = [prev_h_t, prev_c_t, x_t]
            h_t, c_t, o_t, = self.compute_ht_ct_ot(config, prev_ht_ct_ot, H_t, t, is_pred=False)


            o_obs_batch.append(o_t)

            # 设置当前行人的状态
            prev_h_t = h_t
            prev_c_t = c_t


        # 将output_t的列表转换为output_batch
        o_obs_batch = _stack_permute_axis_zero(o_obs_batch)

       
        #  位置估计
        
      

        x_obs_t_final = Lambda(lambda x: x[:, -1, :, :])(self.x_input)
        pid_obs_t_final = Lambda(lambda x_t: x_t[:, :, 0])(x_obs_t_final)
        pid_obs_t_final = Lambda(lambda p_t: K.expand_dims(p_t, 2))(
            pid_obs_t_final)

        x_pred_batch = []
        o_pred_batch = []
        for t in range(config.pred_len):
            if t == 0:
                prev_o_t = Lambda(lambda o_b: o_b[:, -1, :, :])(o_obs_batch)
            # 根据双变量正太分布采样进行坐标预测
            pred_pos_t = normal2d_sample(prev_o_t)
            # 假设对最后一个观测帧中的所有行人进行预测
            x_pred_t = Concatenate(axis=2)([pid_obs_t_final, pred_pos_t])

            grid_t = tf_grid_mask(x_pred_t, get_image_size(config.test_dataset_kind),
                                  config.n_neighbor_pixels, config.grid_side)

            
            H_t = self._compute_social_tensor(grid_t, prev_h_t, config)

            prev_ht_ct_ot = [prev_h_t, prev_c_t, prev_o_t]
            

            h_t, c_t, o_t, = self.compute_ht_ct_ot(config, prev_ht_ct_ot, H_t, t)
            

            o_pred_batch.append(o_t)
            x_pred_batch.append(x_pred_t)

          
            prev_h_t = h_t
            prev_c_t = c_t
            prev_o_t = o_t

        # 将output_t的列表转换为output_batch
        o_pred_batch = _stack_permute_axis_zero(o_pred_batch)
        x_pred_batch = _stack_permute_axis_zero(x_pred_batch)

        
        self.train_model = Model(
            [self.x_input, self.grid_input, self.zeros_input],
            o_pred_batch
        )
        #设置学习率
        lr = 0.003
        optimizer = RMSprop(lr=config.lr)
        self.train_model.compile(optimizer, self._compute_loss)

        self.sample_model = Model(
            [self.x_input, self.grid_input, self.zeros_input],
            x_pred_batch
        )