def get_data(self):
        x_train, y_train, flatten_att_nbhd_inputs_train, flatten_att_flow_inputs_train, att_lstm_inputs_train, nbhd_inputs_train, flow_inputs_train, lstm_inputs_train = [], [], [], [], [], [], [], []
        x_val, y_val, flatten_att_nbhd_inputs_val, flatten_att_flow_inputs_val, att_lstm_inputs_val, nbhd_inputs_val, flow_inputs_val, lstm_inputs_val = [], [], [], [], [], [], [], []
        x_test, y_test, flatten_att_nbhd_inputs_test, flatten_att_flow_inputs_test, att_lstm_inputs_test, nbhd_inputs_test, flow_inputs_test, lstm_inputs_test = [], [], [], [], [], [], [], []
        if self.data is None:
            self.data = {}
            if self.cache_dataset and os.path.exists(self.cache_file_name):
                x_train, y_train, flatten_att_nbhd_inputs_train, flatten_att_flow_inputs_train, att_lstm_inputs_train, nbhd_inputs_train, flow_inputs_train, lstm_inputs_train, \
                x_val, y_val, flatten_att_nbhd_inputs_val, flatten_att_flow_inputs_val, att_lstm_inputs_val, nbhd_inputs_val, flow_inputs_val, lstm_inputs_val, \
                x_test, y_test, flatten_att_nbhd_inputs_test, flatten_att_flow_inputs_test, att_lstm_inputs_test, nbhd_inputs_test, flow_inputs_test, lstm_inputs_test = self._load_cache_train_val_test()
            else:
                x_train, y_train, flatten_att_nbhd_inputs_train, flatten_att_flow_inputs_train, att_lstm_inputs_train, nbhd_inputs_train, flow_inputs_train, lstm_inputs_train, \
                x_val, y_val, flatten_att_nbhd_inputs_val, flatten_att_flow_inputs_val, att_lstm_inputs_val, nbhd_inputs_val, flow_inputs_val, lstm_inputs_val, \
                x_test, y_test, flatten_att_nbhd_inputs_test, flatten_att_flow_inputs_test, att_lstm_inputs_test, nbhd_inputs_test, flow_inputs_test, lstm_inputs_test = self._generate_train_val_test()
        self.feature_dim = x_train.shape[-1]
        self.feature_vec_len = lstm_inputs_train.shape[-1]
        self.nbhd_type = nbhd_inputs_train.shape[-1]
        self.scaler, self.flow_scaler = self._get_scalar_stdn(x_train, y_train, flow_inputs_train)
        x_train = self.scaler.transform(x_train)
        y_train = self.scaler.transform(y_train)
        flatten_att_nbhd_inputs_train = self.scaler.transform(flatten_att_nbhd_inputs_train)
        att_lstm_inputs_train = self.scaler.transform(att_lstm_inputs_train)
        nbhd_inputs_train = self.scaler.transform(nbhd_inputs_train)
        lstm_inputs_train = self.scaler.transform(lstm_inputs_train)
        x_val = self.scaler.transform(x_val)
        y_val = self.scaler.transform(y_val)
        flatten_att_nbhd_inputs_val = self.scaler.transform(flatten_att_nbhd_inputs_val)
        att_lstm_inputs_val = self.scaler.transform(att_lstm_inputs_val)
        nbhd_inputs_val = self.scaler.transform(nbhd_inputs_val)
        lstm_inputs_val = self.scaler.transform(lstm_inputs_val)
        x_test = self.scaler.transform(x_test)
        y_test = self.scaler.transform(y_test)
        flatten_att_nbhd_inputs_test = self.scaler.transform(flatten_att_nbhd_inputs_test)
        att_lstm_inputs_test = self.scaler.transform(att_lstm_inputs_test)
        nbhd_inputs_test = self.scaler.transform(nbhd_inputs_test)
        lstm_inputs_test = self.scaler.transform(lstm_inputs_test)

        flatten_att_flow_inputs_train = self.flow_scaler.transform(flatten_att_flow_inputs_train)
        flow_inputs_train = self.flow_scaler.transform(flow_inputs_train)
        flatten_att_flow_inputs_val = self.flow_scaler.transform(flatten_att_flow_inputs_val)
        flow_inputs_val = self.flow_scaler.transform(flow_inputs_val)
        flatten_att_flow_inputs_test = self.flow_scaler.transform(flatten_att_flow_inputs_test)
        flow_inputs_test = self.flow_scaler.transform(flow_inputs_test)

        train_data = list(
            zip(x_train, y_train, flatten_att_nbhd_inputs_train, flatten_att_flow_inputs_train, att_lstm_inputs_train,
                nbhd_inputs_train, flow_inputs_train, lstm_inputs_train))
        eval_data = list(
            zip(x_val, y_val, flatten_att_nbhd_inputs_val, flatten_att_flow_inputs_val, att_lstm_inputs_val,
                nbhd_inputs_val, flow_inputs_val, lstm_inputs_val))
        test_data = list(
            zip(x_test, y_test, flatten_att_nbhd_inputs_test, flatten_att_flow_inputs_test, att_lstm_inputs_test,
                nbhd_inputs_test, flow_inputs_test, lstm_inputs_test))
        self.train_dataloader, self.eval_dataloader, self.test_dataloader = \
            generate_dataloader(train_data, eval_data, test_data, self.feature_name,
                                self.batch_size, self.num_workers, pad_with_last_sample=self.pad_with_last_sample)
        return self.train_dataloader, self.eval_dataloader, self.test_dataloader
Ejemplo n.º 2
0
    def get_data(self):
        """
        返回数据的DataLoader,包括训练数据、测试数据、验证数据

        Returns:
            tuple: tuple contains:
                train_dataloader: Dataloader composed of Batch (class) \n
                eval_dataloader: Dataloader composed of Batch (class) \n
                test_dataloader: Dataloader composed of Batch (class)
        """
        # 加载数据集
        x_train, y_train, x_val, y_val, x_test, y_test = [], [], [], [], [], []
        ext_x_train, ext_y_train, ext_x_test, ext_y_test, ext_x_val, ext_y_val = [], [], [], [], [], []
        if self.data is None:
            self.data = {}
            if self.cache_dataset and os.path.exists(self.cache_file_name):
                x_train, y_train, x_val, y_val, x_test, y_test,  \
                    ext_x_train, ext_y_train, ext_x_test, ext_y_test, ext_x_val, ext_y_val \
                    = self._load_cache_train_val_test()
            else:
                x_train, y_train, x_val, y_val, x_test, y_test, \
                    ext_x_train, ext_y_train, ext_x_test, ext_y_test, ext_x_val, ext_y_val \
                    = self._generate_train_val_test()
        # 数据归一化
        self.feature_dim = x_train.shape[-1]
        self.ext_dim = ext_x_train.shape[-1]
        self.scaler = self._get_scalar(self.scaler_type,
                                       x_train[..., :self.output_dim],
                                       y_train[..., :self.output_dim])
        self.ext_scaler = self._get_scalar(self.ext_scaler_type,
                                           x_train[..., self.output_dim:],
                                           y_train[..., self.output_dim:])
        x_train = self.scaler.transform(x_train)
        y_train = self.scaler.transform(y_train)
        x_val = self.scaler.transform(x_val)
        y_val = self.scaler.transform(y_val)
        x_test = self.scaler.transform(x_test)
        y_test = self.scaler.transform(y_test)
        if self.normal_external:
            ext_x_train = self.ext_scaler.transform(ext_x_train)
            ext_y_train = self.ext_scaler.transform(ext_y_train)
            ext_x_val = self.ext_scaler.transform(ext_x_val)
            ext_y_val = self.ext_scaler.transform(ext_y_val)
            ext_x_test = self.ext_scaler.transform(ext_x_test)
            ext_y_test = self.ext_scaler.transform(ext_y_test)
        # 把训练集的X和y聚合在一起成为list,测试集验证集同理
        # x_train/y_train: (num_samples, input_length, ..., feature_dim)
        # train_data(list): train_data[i]是一个元组,由x_train[i]和y_train[i]组成
        train_data = list(zip(x_train, y_train, ext_x_train, ext_y_train))
        eval_data = list(zip(x_val, y_val, ext_x_val, ext_y_val))
        test_data = list(zip(x_test, y_test, ext_x_test, ext_y_test))
        # 转Dataloader
        self.train_dataloader, self.eval_dataloader, self.test_dataloader = \
            generate_dataloader(train_data, eval_data, test_data, self.feature_name,
                                self.batch_size, self.num_workers, pad_with_last_sample=self.pad_with_last_sample)
        return self.train_dataloader, self.eval_dataloader, self.test_dataloader
    def get_data(self):
        """
                返回数据的DataLoader,包括训练数据、测试数据、验证数据

                Returns:
                    batch_data: dict
                """
        # 加载数据集
        if self.cache_dataset and os.path.exists(self.cache_file_name):
            I_train, J_train, Neg_train, I_eval, J_eval, Neg_eval, I_test, J_test, Neg_test = self._load_cache(
            )
        else:
            I_train, J_train, Neg_train, I_eval, J_eval, Neg_eval, I_test, J_test, Neg_test = self._generate_data(
            )

        train_data = list(zip(I_train, J_train, Neg_train))
        eval_data = list(zip(I_eval, J_eval, Neg_eval))
        test_data = list(zip(I_test, J_test, Neg_test))

        self.train_dataloader, self.eval_dataloader, self.test_dataloader = \
            generate_dataloader(train_data, eval_data, test_data, self.feature_name, self.batch_size, self.num_workers)

        return self.train_dataloader, self.eval_dataloader, self.test_dataloader
 def get_data(self):
     # 加载数据集
     x_time_train, x_space_train, x_ext_train, y_train, x_time_val, x_space_val, x_ext_val, y_val, \
         x_time_test, x_space_test, x_ext_test, y_test = [], [], [], [], [], [], [], [], [], [], [], []
     if self.data is None:
         self.data = {}
         if self.cache_dataset and os.path.exists(self.cache_file_name):
             x_time_train, x_space_train, x_ext_train, y_train, x_time_val, x_space_val, x_ext_val, y_val, \
                 x_time_test, x_space_test, x_ext_test, y_test = self._load_cache_train_val_test()
         else:
             x_time_train, x_space_train, x_ext_train, y_train, x_time_val, x_space_val, x_ext_val, y_val, \
                 x_time_test, x_space_test, x_ext_test, y_test = self._generate_train_val_test()
     # 数据归一化
     self.feature_dim = x_time_train.shape[-1]
     self.ext_dim = x_ext_train.shape[-1]
     self.scaler = self._get_scalar(self.scaler_type, x_space_train, y_train)
     x_time_train[..., :self.output_dim] = self.scaler.transform(x_time_train[..., :self.output_dim])
     x_space_train[..., :self.output_dim] = self.scaler.transform(x_space_train[..., :self.output_dim])
     x_ext_train[..., :self.output_dim] = self.scaler.transform(x_ext_train[..., :self.output_dim])
     y_train[..., :self.output_dim] = self.scaler.transform(y_train[..., :self.output_dim])
     x_time_val[..., :self.output_dim] = self.scaler.transform(x_time_val[..., :self.output_dim])
     x_space_val[..., :self.output_dim] = self.scaler.transform(x_space_val[..., :self.output_dim])
     x_ext_val[..., :self.output_dim] = self.scaler.transform(x_ext_val[..., :self.output_dim])
     y_val[..., :self.output_dim] = self.scaler.transform(y_val[..., :self.output_dim])
     x_time_test[..., :self.output_dim] = self.scaler.transform(x_time_test[..., :self.output_dim])
     x_space_test[..., :self.output_dim] = self.scaler.transform(x_space_test[..., :self.output_dim])
     x_ext_test[..., :self.output_dim] = self.scaler.transform(x_ext_test[..., :self.output_dim])
     y_test[..., :self.output_dim] = self.scaler.transform(y_test[..., :self.output_dim])
     train_data = list(zip(x_time_train, x_space_train, x_ext_train, y_train))
     eval_data = list(zip(x_time_val, x_space_val, x_ext_val, y_val))
     test_data = list(zip(x_time_test, x_space_test, x_ext_test, y_test))
     self.train_dataloader, self.eval_dataloader, self.test_dataloader = \
         generate_dataloader(train_data, eval_data, test_data, self.feature_name,
                             self.batch_size, self.num_workers, pad_with_last_sample=self.pad_with_last_sample)
     self.num_batches = len(self.train_dataloader)
     return self.train_dataloader, self.eval_dataloader, self.test_dataloader