def get_sample_meanvar(train_files): size_acc=0 data=np.load(train_files[0]) data=data[:,0:-1] size_acc+=np.shape(data)[0] mean_acc=data.mean(axis=0) print('%s: Getting mean of training samples...' % sys.argv[0]) for i in range(1,len(train_files)): data=np.load(train_files[i]) data=data[:,0:-1] size_now=np.shape(data)[0] mean_now=data.mean(axis=0) mean_acc=(mean_now*size_now+mean_acc*size_acc)/(size_now+size_acc) size_acc+=size_now size_acc=0 data=np.load(train_files[0]) data=data[:,0:-1] size_acc+=np.shape(data)[0] var_acc=np.sum(np.square(data-mean_acc),axis=0) print('%s: Getting variance of training samples...' % sys.argv[0]) for i in range(1,len(train_files)): data=np.load(train_files[i]) data=data[:,0:-1] size_now=np.shape(data)[0] size_acc+=size_now var_acc+=np.sum(np.square(data-mean_acc),axis=0) var_acc=var_acc/size_acc; return mean_acc, var_acc
def normalize(self, data): ''' Normalize data based on normalize_mode ''' assert len(data.shape) == 4 if self.normalize_mode == '12': mean = data.mean(axis=(2, 3), dtype=np.float32, keepdims=True) std = data.std(axis=(2, 3), dtype=np.float32, keepdims=True) data = np.nan_to_num((data - mean)/std) elif self.normalize_mode == '3': shape = data.shape temp_data = data.reshape((-1, (192*224*192)//data.shape[2]//data.shape[3], 2, data.shape[2], data.shape[3])) mean = temp_data.mean(axis=1, dtype=np.float32, keepdims=True) std = temp_data.std(axis=1, dtype=np.float32, keepdims=True) data = np.nan_to_num((temp_data - mean)/std).reshape(shape) elif self.normalize_mode == '123': shape = data.shape temp_data = data.reshape((-1, (192*224*192)//data.shape[2]//data.shape[3], 2, data.shape[2], data.shape[3])) mean = temp_data.mean(axis=1, dtype=np.float32, keepdims=True) std = temp_data.std(axis=1, dtype=np.float32, keepdims=True) data = np.nan_to_num((temp_data - mean) / std).reshape(shape) mean = data.mean(axis=(2, 3), dtype=np.float32, keepdims=True) std = data.std(axis=(2, 3), dtype=np.float32, keepdims=True) data = np.nan_to_num((data - mean)/std) return data
def remove_season(data, standardize=True, mean=None, std=None): # Function to remove seasonality from data # Returns de-seasonalized data with same shape as input if mean is None: mean = data.mean(dim='year') std = data.std(dim='year') if standardize: data = (data - data.mean(dim='year')) / data.std(dim='year') else: data = data - data.mean(dim='year') return data, mean, std
def plot_dist_with_stats(data, labels=None, title='Distribution of ECG Signal', ax=None, stats=True): mean = data.mean(skipna=True) std = data.std(skipna=True) if ax is None: fig, ax = plt.subplots() sns.distplot(data, bins=200, fit=norm, kde=True, ax=ax, norm_hist=True, hist=True) if stats: ax.axvline(mean.item(), color='w', linestyle='dashed', linewidth=2) ax.axvline(std.item(), color='r', linestyle='dashed', linewidth=2) ax.axvline(-std.item(), color='r', linestyle='dashed', linewidth=2) ax.set_xlabel("Samples") ax.set_ylabel("Probability density") ax.set_title(title) ax.text(-7, 0.1, "Extreme negatives") ax.text(7, 0.1, "Extreme positives") if labels is not None: plt.legend(labels=labels) plt.show() return ax
def get_mean_map(self): data = self.data N, C, T, V, M = data.shape self.mean_map = data.mean(axis=2, keepdims=True).mean( axis=4, keepdims=True).mean(axis=0) self.std_map = data.transpose((0, 2, 4, 1, 3)).reshape( (N * T * M, C * V)).std(axis=0).reshape((C, 1, V, 1))
def dataSetStatistics(data_dir, batch_size, num_data): # Detect if we have a GPU available device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print('Current device: '+str(device)) transform = transforms.Compose([transforms.ToTensor()]) img_list = [f for f in listdir(data_dir) if isfile(join(data_dir, f))] dataset = UnsuperviseDataset(data_dir, img_list, transform=transform) total = dataset.__len__() print('length of entire dataset:', total) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=16) # calculate mean and std for training data mean = 0. std = 0. m = 0 for data, _ in dataloader: batch_samples = data.size(0) data = data.view(batch_samples, data.size(1), -1) # reshape mean = mean + data.mean(2).sum(0) std = std + data.std(2).sum(0) m = m + batch_samples if m > num_data: break mean = mean / m std = std / m print('mean:',mean) print('std:',std) return mean, std
def dataSetStatistics(data_dir, batch_size, num_data): # Detect if we have a GPU available # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # print('Current device: '+str(device)) transform = transforms.Compose([transforms.ToTensor()]) # img_list = [f for f in listdir(data_dir) if isfile(join(data_dir, f))] img_list = [] for item in listdir( data_dir ): # /var/scratch/jfeins1/resnet-binary/fold0/train/ item= 1 or 3 if isfile(join(data_dir, item) ): # /var/scratch/jfeins1/resnet-binary/fold0/train/1/ FALSE img_list.append(item) elif isdir(join(data_dir, item) ): # /var/scratch/jfeins1/resnet-binary/fold0/train/1/ TRUE update_data_dir = join(data_dir, item) for f in listdir( update_data_dir ): # /var/scratch/jfeins1/resnet-binary/fold0/train/1/ f= 5iune00 or 3ir5a00 if isfile( join(update_data_dir, f) ): # /var/scratch/jfeins1/resnet-binary/fold0/train/1/5iune00 FALSE img_list.append(item + '/' + f) elif isdir( join(update_data_dir, f) ): # /var/scratch/jfeins1/resnet-binary/fold0/train/1/5iune00 TRUE deeper_data_dir = join( update_data_dir, f ) # deeper = /var/scratch/jfeins1/resnet-binary/fold0/train/1/5iune00 for y in listdir(deeper_data_dir): if isfile(join(deeper_data_dir, y)): img_list.append(item + '/' + f + '/' + y) dataset = UnsuperviseDataset(data_dir, img_list, transform=transform) total = dataset.__len__() print('length of entire dataset:', total) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=16) # calculate mean and std for training data mean = 0. std = 0. m = 0 for data, _ in dataloader: batch_samples = data.size(0) data = data.view(batch_samples, data.size(1), -1) # reshape mean = mean + data.mean(2).sum(0) std = std + data.std(2).sum(0) m = m + batch_samples if m > num_data: break mean = mean / m std = std / m #print('mean:',mean) #print('std:',std) return mean, std
def calculate_mean_std_dataset(loader): mean_d = 0. std_d = 0. mean_l = 0. std_l = 0. nb_samples = 0. for data, label in loader: batch_samples = data.size(0) data = data.view(batch_samples, data.size(1), -1) mean_d += data.mean(2).sum(0) std_d += data.std(2).sum(0) nb_samples += batch_samples label = label.view(batch_samples, label.size(1), -1) mean_l += label.mean(2).sum(0) std_l += label.std(2).sum(0) mean_d /= nb_samples std_d /= nb_samples mean_l /= nb_samples std_l /= nb_samples print("Data Mean: ", mean_d) print("Data Std: ", std_d) print("Data Mean: ", mean_l) print("Data Std: ", std_l) return mean_d, std_d, mean_l, std_l
def prepare(self, *select): """ Args: *select: Returns: """ datafile, labelfile = self.files(*select) data_filepath = os.path.join(self.root, datafile) label_filepath = os.path.join(self.root, labelfile) data = [] target = [] with open(data_filepath) as data_f, open(label_filepath) as label_f: for x, y in zip(data_f, it.islice(label_f, self.sync_files, None)): data.append(list(map(int, x.split()))) target.append(int(y)) data = torch.Tensor(data) target = torch.Tensor(target) if self.stardardize: data_mean = data.mean(dim=0, keepdim=True) data_std = data.std(dim=0, keepdim=True) data = (data - data_mean) / data_std return data, target
def get_data_ch11(batch_size=10, n=1500): data = np.genfromtxt(d2l.download('airfoil'), dtype=np.float32, delimiter='\t') data = torch.from_numpy((data - data.mean(axis=0)) / data.std(axis=0)) data_iter = d2l.load_array((data[:n, :-1], data[:n, -1]), batch_size, is_train=True) return data_iter, data.shape[1]-1
def load_sample(fname, normalize=True): from scipy.io.wavfile import read mat = read(fname)[1] mat = np.float32(mat) data = mat.squeeze()[None] if normalize: data = (data - data.mean()) / data.std() return data
def __getitem__(self, index): fpath = os.path.join(self.wav_dir, self.df.fname[index]) y, sr = librosa.load(fpath, sr=self.sr) if sr is None: print('WARNING:', fpath) sr = 44100 # ランダムクロップ y = random_crop(y, int(self.max_length * sr)) # 特徴抽出 n_fft = int(self.window_size * sr) hop_length = int(self.hop_size * sr) if self.feature == 'mfcc': feature = librosa.feature.mfcc(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mfcc=self.n_feature) elif self.feature == 'melgram': feature = librosa.feature.melspectrogram(y, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=self.n_feature) else: print('Invalid feature name: %s' % self.feature) exit(1) data = torch.from_numpy(feature).float() s = data.size() if self.model_type == 'alex2d' or self.model_type == 'resnet': # Conv2dの場合は (channel, features, frames) data.resize_(1, s[0], s[1]) elif self.model_type == 'alex1d' or self.model_type == 'lstm': # Conv1dの場合は (features, frames) data.resize_(s[0], s[1]) else: print('Invalid conv type: %s' % self.model_type) exit(1) mean = data.mean() std = data.std() if std != 0: data.add_(-mean) data.div_(std) if self.test: # テストモードのときは正解ラベルがないのでデータだけ返す return data else: # label label = self.df.label_idx[index] return data, label
def normalize_dataset(data, normalizer, column_wise=False): if normalizer == 'max01': if column_wise: minimum = data.min(axis=0, keepdims=True) maximum = data.max(axis=0, keepdims=True) else: minimum = data.min() maximum = data.max() scaler = MinMax01Scaler(minimum, maximum) data = scaler.transform(data) print('Normalize the dataset by MinMax01 Normalization') elif normalizer == 'max11': if column_wise: minimum = data.min(axis=0, keepdims=True) maximum = data.max(axis=0, keepdims=True) else: minimum = data.min() maximum = data.max() scaler = MinMax11Scaler(minimum, maximum) data = scaler.transform(data) print('Normalize the dataset by MinMax11 Normalization') elif normalizer == 'std': if column_wise: mean = data.mean(axis=0, keepdims=True) std = data.std(axis=0, keepdims=True) else: mean = data.mean() std = data.std() scaler = StandardScaler(mean, std) data = scaler.transform(data) print('Normalize the dataset by Standard Normalization') elif normalizer == 'None': scaler = NScaler() data = scaler.transform(data) print('Does not normalize the dataset') elif normalizer == 'cmax': #column min max, to be depressed #note: axis must be the spatial dimension, please check ! scaler = ColumnMinMaxScaler(data.min(axis=0), data.max(axis=0)) data = scaler.transform(data) print('Normalize the dataset by Column Min-Max Normalization') else: raise ValueError return data, scaler
def get_sample_meanvar(train_files): size_acc = 0 data_dict = pickle.load(open(train_files[0], 'rb')) data_dim = get_data_dim(data_dict) data, labels = dict_2_data(data_dict, data_dim) size_acc += np.shape(data)[0] mean_acc = data.mean(axis=0) print('%s: Getting mean of training samples...' % sys.argv[0]) for ind, file in enumerate(train_files): if ind == 0: continue data_dict = pickle.load(open(file, 'rb')) data, labels = dict_2_data(data_dict, data_dim) size_now = np.shape(data)[0] mean_now = data.mean(axis=0) mean_acc = (mean_now * size_now + mean_acc * size_acc) / (size_now + size_acc) size_acc += size_now size_acc = 0 data_dict = pickle.load(open(train_files[0], 'rb')) data, labels = dict_2_data(data_dict, data_dim) size_acc += np.shape(data)[0] var_acc = np.sum(np.square(data - mean_acc), axis=0) print('%s: Getting variance of training samples...' % sys.argv[0]) for ind, file in enumerate(train_files): if ind == 0: continue data_dict = pickle.load(open(file, 'rb')) data, labels = dict_2_data(data_dict, data_dim) size_now = np.shape(data)[0] size_acc += size_now var_acc += np.sum(np.square(data - mean_acc), axis=0) var_acc = var_acc / size_acc return mean_acc, var_acc
def calculate_mean_std_dataset(loader): mean = 0. std = 0. nb_samples = 0. for data in loader: batch_samples = data.size(0) data = data.view(batch_samples, data.size(1), -1) mean += data.mean(2).sum(0) std += data.std(2).sum(0) nb_samples += batch_samples mean /= nb_samples std /= nb_samples
def get_data_statistics(data_loader): mean = 0. std = 0. nb_samples = 0. for (data, labels) in data_loader: batch_samples = data.size(0) data = data.view(batch_samples, data.size(1), -1) mean += data.mean(2).sum(0) std += data.std(2).sum(0) nb_samples += batch_samples mean /= nb_samples std /= nb_samples return mean, std
def get_mean_std(loader): mean = 0. std = 0. nb_samples = 0. for data, _, _ in loader: batch_samples = data.size(0) data = data.view(batch_samples, data.size(1), -1) mean += data.mean(2).sum(0) std += data.std(2).sum(0) nb_samples += batch_samples mean /= nb_samples std /= nb_samples return mean, std
def loss_values_stat(self, loss_vales): """ 一组loss损失的统计分析 :param loss_vales: 一次batch中,多份样本产生的误差数据 :return: 统计信息文本字符串 """ if not loss_vales: raise ValueError data = np.array(loss_vales, dtype=float) n, sum_ = len(data), data.sum() mean, std = data.mean(), data.std() msg = f'total_loss={sum_:.3f}, mean±std={mean:.3f}±{std:.3f}({max(data):.3f}->{min(data):.3f})' if sum_ < self.min_total_loss: self.min_total_loss = sum_ msg = '*' + msg return msg
def computeStatistics(loader): mean = 0. std = 0. nb_samples = 0. for data in loader: batch_samples = data.size(0) data = data.view(batch_samples, data.size(1), -1) mean += data.mean(2).sum(0) std += data.std(2).sum(0) nb_samples += batch_samples mean /= nb_samples std /= nb_samples return mean, std
def mean_and_std(self) -> Tuple[float, float]: loader = DataLoader(self.subsets['train'], batch_size=10, num_workers=1, shuffle=False) mean = torch.full((3, ), 0.0) std = torch.full((3, ), 0.0) nb_samples = 0. for data, gt in loader: batch_samples = data.size(0) data = data.view(batch_samples, data.size(1), -1) mean += data.mean(2).sum(0) std += data.std(2).sum(0) nb_samples += batch_samples mean /= nb_samples std /= nb_samples return mean, std
def process_mnist(self, mnist: torch.utils.data.Dataset, labels_keep: tuple): data = [] targets = [] for image, label_old in tqdm(mnist, desc=f"Preparing {self.__class__.__name__} dataset"): if label_old in labels_keep: label_new = labels_keep.index(label_old) targets.append(label_new) data.append(image) data = torch.cat(data, dim=0) data_mean = data.mean(dim=0) data_std = data.std(dim=0) data = (data - data_mean) / data_std targets = torch.LongTensor(targets) data_path = self.get_data_path() data_path.parent.mkdir(exist_ok=True, parents=True) with open(data_path, 'wb') as f: torch.save((data, targets), f) print(f"Saved preprocessed data to {data_path}")
def preprocess(self, data): # random hue and saturation data = cv2.cvtColor(data, cv2.COLOR_RGB2HSV); delta = (np.random.random() * 2 - 1) * 0.2 data[:, :, 0] = np.mod(data[:, :, 0] + (delta * 360 + 360.), 360.) delta_sature = np.random.random() + 0.5 data[:, :, 1] *= delta_sature data[:, :, 1] = np.maximum(np.minimum(data[:, :, 1], 1), 0) data = cv2.cvtColor(data, cv2.COLOR_HSV2RGB) # adjust brightness delta = (np.random.random() * 2 - 1) * 0.3 data += delta # adjust contrast mean = data.mean(axis=2, keepdims=True) data = (data - mean) * (np.random.random() + 0.5) + mean data = np.minimum(np.maximum(data, 0), 1) return data
def preprocess(self, data): # random hue and saturation data = cv2.cvtColor(data, cv2.COLOR_RGB2HSV); delta = (np.random.random() * 2 - 1) * 0.2 data[:, :, 0] = np.mod(data[:,:,0] + (delta * 360 + 360.), 360.) delta_sature = np.random.random() + 0.5 data[:, :, 1] *= delta_sature data[:,:, 1] = np.maximum( np.minimum(data[:,:,1], 1), 0 ) data = cv2.cvtColor(data, cv2.COLOR_HSV2RGB) # adjust brightness delta = (np.random.random() * 2 - 1) * 0.3 data += delta # adjust contrast mean = data.mean(axis=2, keepdims=True) data = (data - mean) * (np.random.random() + 0.5) + mean data = np.minimum(np.maximum(data, 0), 1) #cv2.imwrite('x.jpg', (data*255).astype(np.uint8)) return data
def dataSetStatistics(data_dir, batch_size): """ Calculate the statistics of the dataset """ image_size = (256, 256) transform = transforms.Compose([transforms.Resize(image_size), transforms.ToTensor()]) #transform = transforms.Compose([transforms.ToTensor()]) dataset = torchvision.datasets.ImageFolder(data_dir, transform=transform, target_transform=None) m = dataset.__len__() print('length of entire dataset:', m) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=16) # calculate mean and std for training data mean = 0. std = 0. # m = 0 # number of samples for data,data_label in dataloader: # print(data) batch_samples = data.size(0) data = data.view(batch_samples, data.size(1), -1) # reshape mean = mean + data.mean(2).sum(0) std = std + data.std(2).sum(0) # m = m + batch_samples mean = mean / m std = std / m print('mean:',mean) print('std:',std) return mean, std
def get_dataloader(dataset, batch_size=128, window=12, horizon=1, val_days=10, test_days=10, normalizer='max'): if dataset == 'SYDNEY': data = Load_Sydney_Demand_Data( os.path.join(base_dir, '1h_data_new3.csv')) print(data.shape) print('Load Sydney Dataset Successfully!') if normalizer == 'max': scaler = MinMaxScaler(data.min(), data.max()) data = scaler.transform(data) print('Normalize the dataset by MinMax Normalization') elif normalizer == 'std': scaler = StandardScaler(data.mean(), data.std()) data = scaler.transform(data) print('Normalize the dataset by Standard Normalization') else: scaler = None X, Y = Add_Window_Horizon(data, window, horizon) print(X.shape, Y.shape) x_tra, x_val, x_test = split_train_val_test(X, val_days, test_days) y_tra, y_val, y_test = split_train_val_test(Y, val_days, test_days) print(x_tra.shape, y_tra.shape) print(x_val.shape, y_val.shape) print(x_test.shape, y_test.shape) train_dataloader = data_loader(x_tra, y_tra, batch_size, 'train') val_dataloader = data_loader(x_val, y_val, batch_size, 'val') test_dataloader = data_loader(x_test, y_test, batch_size, 'test') dataloader = data_loader(X, Y, batch_size, 'all') return train_dataloader, val_dataloader, test_dataloader, scaler
def preprocess(self, data): # random hue and saturation if len(data.shape) < 3 or data.shape[2] < 3: print() data = cv2.cvtColor(data, cv2.COLOR_RGB2HSV); delta = (np.random.random() * 2 - 1) * 0.2 data[:, :, 0] = np.mod(data[:,:,0] + (delta * 360 + 360.), 360.) delta_sature = np.random.random() + 0.5 data[:, :, 1] *= delta_sature data[:,:, 1] = np.maximum( np.minimum(data[:,:,1], 1), 0 ) data = cv2.cvtColor(data, cv2.COLOR_HSV2RGB) # adjust brightness delta = (np.random.random() * 2 - 1) * 0.3 data += delta # adjust contrast mean = data.mean(axis=2, keepdims=True) data = (data - mean) * (np.random.random() + 0.5) + mean data = np.minimum(np.maximum(data, 0), 1) #cv2.imwrite('x.jpg', (data*255).astype(np.uint8)) return data
def prepare(self): """ Make torch Tensors from data and label files. Returns: """ datafile = self.urls[0].rpartition('/')[2] data_filepath = os.path.join(self.root, datafile) data = [] target = [] with open(data_filepath) as data_f: for sample in data_f: x, y, label = tuple(map(float, sample.split())) data.append([x, y]) target.append(int(label) - 1) data = torch.Tensor(data) target = torch.Tensor(target) if self.stardardize: data_mean = data.mean(dim=0, keepdim=True) data_std = data.std(dim=0, keepdim=True) data = (data - data_mean) / data_std return data, target
def get_data_ch7(): data = np.genfromtxt('Datasets/airfoil_self_noise.dat', delimiter='\t') data = (data - data.mean(axis=0)) / data.std(axis=0) return torch.tensor(data[:1500, :-1], dtype=torch.float32), torch.tensor(data[:1500, -1], dtype=torch.float32)
def normalize(data): man=data.mean(0) std=data.std(0) y=data-man z=y/std return z,man,std
def normalize_(data: torch.Tensor): mean = data.mean(0) std = data.std(0) return (data - mean) / std, mean, std
def main(opt): opt.update({ 'feats_i': "/home/yangbang/VideoCaptioning/MSRVTT/feats/msrvtt_R101.hdf5", 'feats_m': "/home/yangbang/VideoCaptioning/MSRVTT/feats/msrvtt_c3d_60_fc6.hdf5", #"/home/yangbang/VideoCaptioning/MSRVTT/feats/msrvtt_kinetics_60.hdf5", 'feats_a': ["/home/yangbang/VideoCaptioning/MSRVTT/feats/msrvtt_vggish_60.hdf5", "/home/yangbang/VideoCaptioning/MSRVTT/feats/fvdb_260.hdf5", "/home/yangbang/VideoCaptioning/MSRVTT/feats/vtt_boaw256.hdf5"], 'dim_i': 2048, 'dim_m': 4096, 'dim_a': 644 }) data_i = [load_database(opt["feats_i"]), opt["dim_i"]] data_m = [load_database(opt["feats_m"]), opt["dim_m"]] data_a = [load_database(opt["feats_a"]), opt["dim_a"]] length, n_frames, random_type, equally_sampling = 60, 8, None, True frames_idx = get_frames_idx(length, n_frames, random_type, equally_sampling=equally_sampling) if opt['em'] == 'validate': begin, end = 6513, 7010 elif opt['em'] == 'test': begin, end = 7010, 10000 else: begin, end = 0, 6513 feats_i, feats_m, feats_a = [], [], [] for ix in range(begin, end): vid = 'video%d' % ix i = load_feats(data_i, vid, frames_idx) m = load_feats(data_m, vid, frames_idx) a = load_feats(data_a, vid, frames_idx) feats_i.append(i) feats_m.append(m) feats_a.append(a) feats_i = np.array(feats_i) feats_m = np.array(feats_m) feats_a = np.array(feats_a) mapping = { 'a': feats_a, 'm': feats_m, 'i': feats_i } if opt['plot']: visualize(opt) elif opt['cal']: for modality in ['i', 'm', 'a', 'im', 'ia', 'ma', 'ima']: feats = [] for char in modality: feats.append(mapping[char]) data = np.concatenate(feats, axis=2) data = data.mean(1) intra, inter = cal_centers(opt['em'], torch.from_numpy(data).cuda()) print('%4s\tIntra: %05.3f\tInter: %05.3f' % (modality, intra, inter)) else: for modality in ['i', 'm', 'a', 'im', 'ia', 'ma', 'ima']: feats = [] for char in modality: feats.append(mapping[char]) data = np.concatenate(feats, axis=2) name = '%s.npy' % modality if opt['mean']: data = data.mean(1) pca = manifold.TSNE(n_components=2) collect = pca.fit_transform(data) #对样本进行降维 elif opt['all']: bsz, seq_len, dim = data.shape data = data.reshape(bsz * seq_len, dim) pca = manifold.TSNE(n_components=2) collect = pca.fit_transform(data) #对样本进行降维 else: assert len(data.shape) == 3 seq_len = data.shape[1] collect = [] for nf in range(seq_len): x = data[:, nf, :] pca = manifold.TSNE(n_components=2) #pca = PCA(n_components=opt.pca) #加载PCA算法,设置降维后主成分数目为2 reduced_x = pca.fit_transform(x) #对样本进行降维 collect.append(reduced_x) collect = np.stack(collect, 1) print(name, collect.shape) np.save(os.path.join(opt['pca_path'], name), collect)