def preprocess_data(self, x, val_x=None, feature_length=None): x = pad_seq(x, pad_len=feature_length) if val_x is not None: val_x = pad_seq(val_x, pad_len=feature_length) return x, val_x return x
def get_features_data(x, feature_func, model_kind, split_length=None, feature_length=None): if model_kind == 0: x = [sample[:split_length] for sample in x] x = feature_func(x) x_feas = [] for i in range(len(x)): fea = np.mean(x[i], axis=0).reshape(-1) fea_std = np.std(x[i], axis=0).reshape(-1) x_feas.append(np.concatenate([fea, fea_std], axis=-1)) x_feas = np.asarray(x_feas, dtype=np.float32) scaler = StandardScaler() x = scaler.fit_transform(x_feas[:, :]) elif model_kind == 1: x = [sample[:split_length] for sample in x] x = feature_func(x) x = pad_seq(x, pad_len=feature_length) return x
def _get_preprocess_data(self, sample_index, model_kind, pre_func): need_pre = set([i for i in sample_index if i not in self._pre_x]) raw_data = [self._all_x[i] for i in need_pre] if model_kind == 0 or model_kind == 1: raw_data = [ sample[:self._raw_data_split_length] for sample in raw_data ] pre_data = pre_func(raw_data) if model_kind == 0 or model_kind == 1: if self._feature_length is None: self._feature_length = get_max_length(pre_data) self._feature_length = min(MAX_FRAME_NUM, self._feature_length) # pre_data = pad_seq(pre_data, pad_len=self._feature_length) log("Total {}, update {}".format(len(sample_index), len(need_pre))) if len(need_pre) > 0: if model_kind == 0: x_feas = [] for i in range(len(pre_data)): fea = np.mean(pre_data[i], axis=0).reshape(-1) fea_std = np.std(pre_data[i], axis=0).reshape(-1) x_feas.append(np.concatenate([fea, fea_std], axis=-1)) x_feas = np.asarray(x_feas) scaler = StandardScaler() pre_data = scaler.fit_transform(x_feas[:, :]) elif model_kind == 1: pre_data = pad_seq(pre_data, pad_len=self._feature_length) cnt = 0 for i in need_pre: self._pre_x[i] = pre_data[cnt] cnt += 1 x = [self._pre_x[i] for i in sample_index] y = [self._all_y[i] for i in sample_index] return x, y