def get_sample_meanvar(train_files):
    
    size_acc=0
    data=np.load(train_files[0])
    data=data[:,0:-1]
    size_acc+=np.shape(data)[0] 
    mean_acc=data.mean(axis=0)
   
    print('%s: Getting mean of training samples...' % sys.argv[0])
    for i in range(1,len(train_files)):
        data=np.load(train_files[i])
        data=data[:,0:-1]
        size_now=np.shape(data)[0]        
        mean_now=data.mean(axis=0)
        mean_acc=(mean_now*size_now+mean_acc*size_acc)/(size_now+size_acc)
        size_acc+=size_now
    
    size_acc=0
    data=np.load(train_files[0])
    data=data[:,0:-1]
    size_acc+=np.shape(data)[0]
    var_acc=np.sum(np.square(data-mean_acc),axis=0)
    
    print('%s: Getting variance of training samples...' % sys.argv[0])   
    for i in range(1,len(train_files)):
        data=np.load(train_files[i])
        data=data[:,0:-1]
        size_now=np.shape(data)[0]
        size_acc+=size_now
        var_acc+=np.sum(np.square(data-mean_acc),axis=0)
    var_acc=var_acc/size_acc;
    
    return mean_acc, var_acc
Esempio n. 2
0
    def normalize(self, data):
        '''

        Normalize data based on normalize_mode
        '''

        assert len(data.shape) == 4
        if self.normalize_mode == '12':
            mean = data.mean(axis=(2, 3), dtype=np.float32, keepdims=True)
            std = data.std(axis=(2, 3), dtype=np.float32, keepdims=True)
            data = np.nan_to_num((data - mean)/std)

        elif self.normalize_mode == '3':
            shape = data.shape
            temp_data = data.reshape((-1, (192*224*192)//data.shape[2]//data.shape[3], 2, data.shape[2], data.shape[3]))
            mean = temp_data.mean(axis=1, dtype=np.float32, keepdims=True)
            std = temp_data.std(axis=1, dtype=np.float32, keepdims=True)
            data = np.nan_to_num((temp_data - mean)/std).reshape(shape)

        elif self.normalize_mode == '123':
            shape = data.shape
            temp_data = data.reshape((-1, (192*224*192)//data.shape[2]//data.shape[3], 2, data.shape[2], data.shape[3]))
            mean = temp_data.mean(axis=1, dtype=np.float32, keepdims=True)
            std = temp_data.std(axis=1, dtype=np.float32, keepdims=True)
            data = np.nan_to_num((temp_data - mean) / std).reshape(shape)
            mean = data.mean(axis=(2, 3), dtype=np.float32, keepdims=True)
            std = data.std(axis=(2, 3), dtype=np.float32, keepdims=True)
            data = np.nan_to_num((data - mean)/std)

        return data
Esempio n. 3
0
def remove_season(data, standardize=True, mean=None, std=None):
    # Function to remove seasonality from data
    # Returns de-seasonalized data with same shape as input
    if mean is None:
        mean = data.mean(dim='year')
        std = data.std(dim='year')
    if standardize:
        data = (data - data.mean(dim='year')) / data.std(dim='year')
    else:
        data = data - data.mean(dim='year')

    return data, mean, std
def plot_dist_with_stats(data,
                         labels=None,
                         title='Distribution of ECG Signal',
                         ax=None,
                         stats=True):
    mean = data.mean(skipna=True)
    std = data.std(skipna=True)

    if ax is None:
        fig, ax = plt.subplots()

    sns.distplot(data,
                 bins=200,
                 fit=norm,
                 kde=True,
                 ax=ax,
                 norm_hist=True,
                 hist=True)

    if stats:
        ax.axvline(mean.item(), color='w', linestyle='dashed', linewidth=2)
        ax.axvline(std.item(), color='r', linestyle='dashed', linewidth=2)
        ax.axvline(-std.item(), color='r', linestyle='dashed', linewidth=2)

    ax.set_xlabel("Samples")
    ax.set_ylabel("Probability density")
    ax.set_title(title)
    ax.text(-7, 0.1, "Extreme negatives")
    ax.text(7, 0.1, "Extreme positives")
    if labels is not None:
        plt.legend(labels=labels)
    plt.show()

    return ax
Esempio n. 5
0
 def get_mean_map(self):
     data = self.data
     N, C, T, V, M = data.shape
     self.mean_map = data.mean(axis=2, keepdims=True).mean(
         axis=4, keepdims=True).mean(axis=0)
     self.std_map = data.transpose((0, 2, 4, 1, 3)).reshape(
         (N * T * M, C * V)).std(axis=0).reshape((C, 1, V, 1))
def dataSetStatistics(data_dir, batch_size, num_data):
    # Detect if we have a GPU available
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print('Current device: '+str(device))

    transform = transforms.Compose([transforms.ToTensor()])
    img_list = [f for f in listdir(data_dir) if isfile(join(data_dir, f))]
    dataset = UnsuperviseDataset(data_dir, img_list, transform=transform)  
    total = dataset.__len__()
    print('length of entire dataset:', total)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=16)

    # calculate mean and std for training data
    mean = 0.
    std = 0.
    m = 0
    for data, _ in dataloader:
        batch_samples = data.size(0)
        data = data.view(batch_samples, data.size(1), -1) # reshape
        mean = mean + data.mean(2).sum(0)
        std = std + data.std(2).sum(0)
        m = m + batch_samples
        if m > num_data:
            break
    mean = mean / m
    std = std / m
    print('mean:',mean)
    print('std:',std)
    return mean, std
Esempio n. 7
0
def dataSetStatistics(data_dir, batch_size, num_data):
    # Detect if we have a GPU available
    # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # print('Current device: '+str(device))

    transform = transforms.Compose([transforms.ToTensor()])
    # img_list = [f for f in listdir(data_dir) if isfile(join(data_dir, f))]

    img_list = []
    for item in listdir(
            data_dir
    ):  # /var/scratch/jfeins1/resnet-binary/fold0/train/    item= 1 or 3
        if isfile(join(data_dir, item)
                  ):  # /var/scratch/jfeins1/resnet-binary/fold0/train/1/ FALSE
            img_list.append(item)
        elif isdir(join(data_dir, item)
                   ):  # /var/scratch/jfeins1/resnet-binary/fold0/train/1/ TRUE
            update_data_dir = join(data_dir, item)
            for f in listdir(
                    update_data_dir
            ):  # /var/scratch/jfeins1/resnet-binary/fold0/train/1/    f= 5iune00 or 3ir5a00
                if isfile(
                        join(update_data_dir, f)
                ):  # /var/scratch/jfeins1/resnet-binary/fold0/train/1/5iune00 FALSE
                    img_list.append(item + '/' + f)
                elif isdir(
                        join(update_data_dir, f)
                ):  # /var/scratch/jfeins1/resnet-binary/fold0/train/1/5iune00 TRUE
                    deeper_data_dir = join(
                        update_data_dir, f
                    )  # deeper = /var/scratch/jfeins1/resnet-binary/fold0/train/1/5iune00
                    for y in listdir(deeper_data_dir):
                        if isfile(join(deeper_data_dir, y)):
                            img_list.append(item + '/' + f + '/' + y)

    dataset = UnsuperviseDataset(data_dir, img_list, transform=transform)
    total = dataset.__len__()
    print('length of entire dataset:', total)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=16)

    # calculate mean and std for training data
    mean = 0.
    std = 0.
    m = 0
    for data, _ in dataloader:
        batch_samples = data.size(0)
        data = data.view(batch_samples, data.size(1), -1)  # reshape
        mean = mean + data.mean(2).sum(0)
        std = std + data.std(2).sum(0)
        m = m + batch_samples
        if m > num_data:
            break
    mean = mean / m
    std = std / m
    #print('mean:',mean)
    #print('std:',std)
    return mean, std
Esempio n. 8
0
def calculate_mean_std_dataset(loader):
    mean_d = 0.
    std_d = 0.
    mean_l = 0.
    std_l = 0.
    nb_samples = 0.
    for data, label in loader:
        batch_samples = data.size(0)
        data = data.view(batch_samples, data.size(1), -1)
        mean_d += data.mean(2).sum(0)
        std_d += data.std(2).sum(0)
        nb_samples += batch_samples

        label = label.view(batch_samples, label.size(1), -1)
        mean_l += label.mean(2).sum(0)
        std_l += label.std(2).sum(0)

    mean_d /= nb_samples
    std_d /= nb_samples

    mean_l /= nb_samples
    std_l /= nb_samples
    print("Data Mean: ", mean_d)
    print("Data Std: ", std_d)
    print("Data Mean: ", mean_l)
    print("Data Std: ", std_l)
    return mean_d, std_d, mean_l, std_l
Esempio n. 9
0
    def prepare(self, *select):
        """

        Args:
            *select:

        Returns:

        """
        datafile, labelfile = self.files(*select)
        data_filepath = os.path.join(self.root, datafile)
        label_filepath = os.path.join(self.root, labelfile)
        data = []
        target = []
        with open(data_filepath) as data_f, open(label_filepath) as label_f:
            for x, y in zip(data_f, it.islice(label_f, self.sync_files, None)):
                data.append(list(map(int, x.split())))
                target.append(int(y))
        data = torch.Tensor(data)
        target = torch.Tensor(target)

        if self.stardardize:
            data_mean = data.mean(dim=0, keepdim=True)
            data_std = data.std(dim=0, keepdim=True)
            data = (data - data_mean) / data_std

        return data, target
Esempio n. 10
0
def get_data_ch11(batch_size=10, n=1500):
    data = np.genfromtxt(d2l.download('airfoil'),
                         dtype=np.float32, delimiter='\t')
    data = torch.from_numpy((data - data.mean(axis=0)) / data.std(axis=0))
    data_iter = d2l.load_array((data[:n, :-1], data[:n, -1]),
                               batch_size, is_train=True)
    return data_iter, data.shape[1]-1
Esempio n. 11
0
def load_sample(fname, normalize=True):
    from scipy.io.wavfile import read
    mat = read(fname)[1]
    mat = np.float32(mat)
    data = mat.squeeze()[None]
    if normalize:
        data = (data - data.mean()) / data.std()
        return data
Esempio n. 12
0
    def __getitem__(self, index):
        fpath = os.path.join(self.wav_dir, self.df.fname[index])
        y, sr = librosa.load(fpath, sr=self.sr)
        if sr is None:
            print('WARNING:', fpath)
            sr = 44100

        # ランダムクロップ
        y = random_crop(y, int(self.max_length * sr))

        # 特徴抽出
        n_fft = int(self.window_size * sr)
        hop_length = int(self.hop_size * sr)

        if self.feature == 'mfcc':
            feature = librosa.feature.mfcc(y=y,
                                           sr=sr,
                                           n_fft=n_fft,
                                           hop_length=hop_length,
                                           n_mfcc=self.n_feature)
        elif self.feature == 'melgram':
            feature = librosa.feature.melspectrogram(y,
                                                     sr=sr,
                                                     n_fft=n_fft,
                                                     hop_length=hop_length,
                                                     n_mels=self.n_feature)
        else:
            print('Invalid feature name: %s' % self.feature)
            exit(1)

        data = torch.from_numpy(feature).float()
        s = data.size()

        if self.model_type == 'alex2d' or self.model_type == 'resnet':
            # Conv2dの場合は (channel, features, frames)
            data.resize_(1, s[0], s[1])
        elif self.model_type == 'alex1d' or self.model_type == 'lstm':
            # Conv1dの場合は (features, frames)
            data.resize_(s[0], s[1])
        else:
            print('Invalid conv type: %s' % self.model_type)
            exit(1)

        mean = data.mean()
        std = data.std()
        if std != 0:
            data.add_(-mean)
            data.div_(std)

        if self.test:
            # テストモードのときは正解ラベルがないのでデータだけ返す
            return data
        else:
            # label
            label = self.df.label_idx[index]

            return data, label
Esempio n. 13
0
def normalize_dataset(data, normalizer, column_wise=False):
    if normalizer == 'max01':
        if column_wise:
            minimum = data.min(axis=0, keepdims=True)
            maximum = data.max(axis=0, keepdims=True)
        else:
            minimum = data.min()
            maximum = data.max()
        scaler = MinMax01Scaler(minimum, maximum)
        data = scaler.transform(data)
        print('Normalize the dataset by MinMax01 Normalization')
    elif normalizer == 'max11':
        if column_wise:
            minimum = data.min(axis=0, keepdims=True)
            maximum = data.max(axis=0, keepdims=True)
        else:
            minimum = data.min()
            maximum = data.max()
        scaler = MinMax11Scaler(minimum, maximum)
        data = scaler.transform(data)
        print('Normalize the dataset by MinMax11 Normalization')
    elif normalizer == 'std':
        if column_wise:
            mean = data.mean(axis=0, keepdims=True)
            std = data.std(axis=0, keepdims=True)
        else:
            mean = data.mean()
            std = data.std()
        scaler = StandardScaler(mean, std)
        data = scaler.transform(data)
        print('Normalize the dataset by Standard Normalization')
    elif normalizer == 'None':
        scaler = NScaler()
        data = scaler.transform(data)
        print('Does not normalize the dataset')
    elif normalizer == 'cmax':
        #column min max, to be depressed
        #note: axis must be the spatial dimension, please check !
        scaler = ColumnMinMaxScaler(data.min(axis=0), data.max(axis=0))
        data = scaler.transform(data)
        print('Normalize the dataset by Column Min-Max Normalization')
    else:
        raise ValueError
    return data, scaler
def get_sample_meanvar(train_files):

    size_acc = 0
    data_dict = pickle.load(open(train_files[0], 'rb'))
    data_dim = get_data_dim(data_dict)
    data, labels = dict_2_data(data_dict, data_dim)
    size_acc += np.shape(data)[0]
    mean_acc = data.mean(axis=0)

    print('%s: Getting mean of training samples...' % sys.argv[0])
    for ind, file in enumerate(train_files):
        if ind == 0:
            continue
        data_dict = pickle.load(open(file, 'rb'))
        data, labels = dict_2_data(data_dict, data_dim)
        size_now = np.shape(data)[0]
        mean_now = data.mean(axis=0)
        mean_acc = (mean_now * size_now + mean_acc * size_acc) / (size_now +
                                                                  size_acc)
        size_acc += size_now

    size_acc = 0
    data_dict = pickle.load(open(train_files[0], 'rb'))
    data, labels = dict_2_data(data_dict, data_dim)
    size_acc += np.shape(data)[0]
    var_acc = np.sum(np.square(data - mean_acc), axis=0)

    print('%s: Getting variance of training samples...' % sys.argv[0])
    for ind, file in enumerate(train_files):
        if ind == 0:
            continue
        data_dict = pickle.load(open(file, 'rb'))
        data, labels = dict_2_data(data_dict, data_dim)
        size_now = np.shape(data)[0]
        size_acc += size_now
        var_acc += np.sum(np.square(data - mean_acc), axis=0)
    var_acc = var_acc / size_acc

    return mean_acc, var_acc
def calculate_mean_std_dataset(loader):
    mean = 0.
    std = 0.
    nb_samples = 0.
    for data in loader:
        batch_samples = data.size(0)
        data = data.view(batch_samples, data.size(1), -1)
        mean += data.mean(2).sum(0)
        std += data.std(2).sum(0)
        nb_samples += batch_samples

    mean /= nb_samples
    std /= nb_samples
Esempio n. 16
0
def get_data_statistics(data_loader):
    mean = 0.
    std = 0.
    nb_samples = 0.
    for (data, labels) in data_loader:
        batch_samples = data.size(0)
        data = data.view(batch_samples, data.size(1), -1)
        mean += data.mean(2).sum(0)
        std += data.std(2).sum(0)
        nb_samples += batch_samples

    mean /= nb_samples
    std /= nb_samples
    return mean, std
Esempio n. 17
0
def get_mean_std(loader):
    mean = 0.
    std = 0.
    nb_samples = 0.
    for data, _, _ in loader:
        batch_samples = data.size(0)
        data = data.view(batch_samples, data.size(1), -1)
        mean += data.mean(2).sum(0)
        std += data.std(2).sum(0)
        nb_samples += batch_samples

    mean /= nb_samples
    std /= nb_samples

    return mean, std
Esempio n. 18
0
    def loss_values_stat(self, loss_vales):
        """ 一组loss损失的统计分析

        :param loss_vales: 一次batch中,多份样本产生的误差数据
        :return: 统计信息文本字符串
        """
        if not loss_vales:
            raise ValueError

        data = np.array(loss_vales, dtype=float)
        n, sum_ = len(data), data.sum()
        mean, std = data.mean(), data.std()
        msg = f'total_loss={sum_:.3f}, mean±std={mean:.3f}±{std:.3f}({max(data):.3f}->{min(data):.3f})'
        if sum_ < self.min_total_loss:
            self.min_total_loss = sum_
            msg = '*' + msg
        return msg
def computeStatistics(loader):

    mean = 0.
    std = 0.
    nb_samples = 0.

    for data in loader:
        batch_samples = data.size(0)
        data = data.view(batch_samples, data.size(1), -1)
        mean += data.mean(2).sum(0)
        std += data.std(2).sum(0)
        nb_samples += batch_samples

    mean /= nb_samples
    std /= nb_samples

    return mean, std
 def mean_and_std(self) -> Tuple[float, float]:
     loader = DataLoader(self.subsets['train'],
                         batch_size=10,
                         num_workers=1,
                         shuffle=False)
     mean = torch.full((3, ), 0.0)
     std = torch.full((3, ), 0.0)
     nb_samples = 0.
     for data, gt in loader:
         batch_samples = data.size(0)
         data = data.view(batch_samples, data.size(1), -1)
         mean += data.mean(2).sum(0)
         std += data.std(2).sum(0)
         nb_samples += batch_samples
     mean /= nb_samples
     std /= nb_samples
     return mean, std
Esempio n. 21
0
 def process_mnist(self, mnist: torch.utils.data.Dataset, labels_keep: tuple):
     data = []
     targets = []
     for image, label_old in tqdm(mnist, desc=f"Preparing {self.__class__.__name__} dataset"):
         if label_old in labels_keep:
             label_new = labels_keep.index(label_old)
             targets.append(label_new)
             data.append(image)
     data = torch.cat(data, dim=0)
     data_mean = data.mean(dim=0)
     data_std = data.std(dim=0)
     data = (data - data_mean) / data_std
     targets = torch.LongTensor(targets)
     data_path = self.get_data_path()
     data_path.parent.mkdir(exist_ok=True, parents=True)
     with open(data_path, 'wb') as f:
         torch.save((data, targets), f)
     print(f"Saved preprocessed data to {data_path}")
    def preprocess(self, data):
        # random hue and saturation
        data = cv2.cvtColor(data, cv2.COLOR_RGB2HSV);
        delta = (np.random.random() * 2 - 1) * 0.2
        data[:, :, 0] = np.mod(data[:, :, 0] + (delta * 360 + 360.), 360.)

        delta_sature = np.random.random() + 0.5
        data[:, :, 1] *= delta_sature
        data[:, :, 1] = np.maximum(np.minimum(data[:, :, 1], 1), 0)
        data = cv2.cvtColor(data, cv2.COLOR_HSV2RGB)

        # adjust brightness
        delta = (np.random.random() * 2 - 1) * 0.3
        data += delta

        # adjust contrast
        mean = data.mean(axis=2, keepdims=True)
        data = (data - mean) * (np.random.random() + 0.5) + mean
        data = np.minimum(np.maximum(data, 0), 1)
        return data
Esempio n. 23
0
    def preprocess(self, data):
        # random hue and saturation
        data = cv2.cvtColor(data, cv2.COLOR_RGB2HSV);
        delta = (np.random.random() * 2 - 1) * 0.2
        data[:, :, 0] = np.mod(data[:,:,0] + (delta * 360 + 360.), 360.)

        delta_sature = np.random.random() + 0.5
        data[:, :, 1] *= delta_sature
        data[:,:, 1] = np.maximum( np.minimum(data[:,:,1], 1), 0 )
        data = cv2.cvtColor(data, cv2.COLOR_HSV2RGB)

        # adjust brightness
        delta = (np.random.random() * 2 - 1) * 0.3
        data += delta

        # adjust contrast
        mean = data.mean(axis=2, keepdims=True)
        data = (data - mean) * (np.random.random() + 0.5) + mean
        data = np.minimum(np.maximum(data, 0), 1)
        #cv2.imwrite('x.jpg', (data*255).astype(np.uint8))
        return data
def dataSetStatistics(data_dir, batch_size):
    """
    Calculate the statistics of the dataset
    """
    image_size = (256, 256)

    transform = transforms.Compose([transforms.Resize(image_size), transforms.ToTensor()])
    #transform = transforms.Compose([transforms.ToTensor()])

    dataset = torchvision.datasets.ImageFolder(data_dir,
                                               transform=transform,
                                               target_transform=None)

    m = dataset.__len__()
    print('length of entire dataset:', m)

    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=16)

    # calculate mean and std for training data
    mean = 0.
    std = 0.
    # m = 0 # number of samples
    for data,data_label in dataloader:
        # print(data)
        batch_samples = data.size(0)
        data = data.view(batch_samples, data.size(1), -1) # reshape
        mean = mean + data.mean(2).sum(0)
        std = std + data.std(2).sum(0)
        # m = m + batch_samples

    mean = mean / m
    std = std / m
    print('mean:',mean)
    print('std:',std)

    return mean, std
Esempio n. 25
0
def get_dataloader(dataset,
                   batch_size=128,
                   window=12,
                   horizon=1,
                   val_days=10,
                   test_days=10,
                   normalizer='max'):
    if dataset == 'SYDNEY':
        data = Load_Sydney_Demand_Data(
            os.path.join(base_dir, '1h_data_new3.csv'))
        print(data.shape)
        print('Load Sydney Dataset Successfully!')

    if normalizer == 'max':
        scaler = MinMaxScaler(data.min(), data.max())
        data = scaler.transform(data)
        print('Normalize the dataset by MinMax Normalization')
    elif normalizer == 'std':
        scaler = StandardScaler(data.mean(), data.std())
        data = scaler.transform(data)
        print('Normalize the dataset by Standard Normalization')
    else:
        scaler = None

    X, Y = Add_Window_Horizon(data, window, horizon)
    print(X.shape, Y.shape)

    x_tra, x_val, x_test = split_train_val_test(X, val_days, test_days)
    y_tra, y_val, y_test = split_train_val_test(Y, val_days, test_days)
    print(x_tra.shape, y_tra.shape)
    print(x_val.shape, y_val.shape)
    print(x_test.shape, y_test.shape)

    train_dataloader = data_loader(x_tra, y_tra, batch_size, 'train')
    val_dataloader = data_loader(x_val, y_val, batch_size, 'val')
    test_dataloader = data_loader(x_test, y_test, batch_size, 'test')
    dataloader = data_loader(X, Y, batch_size, 'all')
    return train_dataloader, val_dataloader, test_dataloader, scaler
Esempio n. 26
0
    def preprocess(self, data):
        # random hue and saturation
        if len(data.shape) < 3 or data.shape[2] < 3:
            print()
        data = cv2.cvtColor(data, cv2.COLOR_RGB2HSV);
        delta = (np.random.random() * 2 - 1) * 0.2
        data[:, :, 0] = np.mod(data[:,:,0] + (delta * 360 + 360.), 360.)

        delta_sature = np.random.random() + 0.5
        data[:, :, 1] *= delta_sature
        data[:,:, 1] = np.maximum( np.minimum(data[:,:,1], 1), 0 )
        data = cv2.cvtColor(data, cv2.COLOR_HSV2RGB)

        # adjust brightness
        delta = (np.random.random() * 2 - 1) * 0.3
        data += delta

        # adjust contrast
        mean = data.mean(axis=2, keepdims=True)
        data = (data - mean) * (np.random.random() + 0.5) + mean
        data = np.minimum(np.maximum(data, 0), 1)
        #cv2.imwrite('x.jpg', (data*255).astype(np.uint8))
        return data
Esempio n. 27
0
    def prepare(self):
        """
        Make torch Tensors from data and label files.
        Returns:

        """
        datafile = self.urls[0].rpartition('/')[2]
        data_filepath = os.path.join(self.root, datafile)
        data = []
        target = []
        with open(data_filepath) as data_f:
            for sample in data_f:
                x, y, label = tuple(map(float, sample.split()))
                data.append([x, y])
                target.append(int(label) - 1)
        data = torch.Tensor(data)
        target = torch.Tensor(target)

        if self.stardardize:
            data_mean = data.mean(dim=0, keepdim=True)
            data_std = data.std(dim=0, keepdim=True)
            data = (data - data_mean) / data_std

        return data, target
Esempio n. 28
0
def get_data_ch7():
    data = np.genfromtxt('Datasets/airfoil_self_noise.dat', delimiter='\t')
    data = (data - data.mean(axis=0)) / data.std(axis=0)
    return torch.tensor(data[:1500, :-1],
                        dtype=torch.float32), torch.tensor(data[:1500, -1],
                                                           dtype=torch.float32)
Esempio n. 29
0
 def normalize(data):            
     man=data.mean(0)
     std=data.std(0)
     y=data-man
     z=y/std
     return z,man,std
Esempio n. 30
0
    def normalize_(data: torch.Tensor):
        mean = data.mean(0)
        std = data.std(0)

        return (data - mean) / std, mean, std
Esempio n. 31
0
def main(opt):
    opt.update({
        'feats_i': "/home/yangbang/VideoCaptioning/MSRVTT/feats/msrvtt_R101.hdf5",
        'feats_m': "/home/yangbang/VideoCaptioning/MSRVTT/feats/msrvtt_c3d_60_fc6.hdf5", #"/home/yangbang/VideoCaptioning/MSRVTT/feats/msrvtt_kinetics_60.hdf5",
        'feats_a': ["/home/yangbang/VideoCaptioning/MSRVTT/feats/msrvtt_vggish_60.hdf5", "/home/yangbang/VideoCaptioning/MSRVTT/feats/fvdb_260.hdf5", "/home/yangbang/VideoCaptioning/MSRVTT/feats/vtt_boaw256.hdf5"],
        'dim_i': 2048,
        'dim_m': 4096,
        'dim_a': 644
    })
    data_i = [load_database(opt["feats_i"]), opt["dim_i"]]
    data_m = [load_database(opt["feats_m"]), opt["dim_m"]]
    data_a = [load_database(opt["feats_a"]), opt["dim_a"]]


    length, n_frames, random_type, equally_sampling = 60, 8, None, True
    frames_idx = get_frames_idx(length, n_frames, random_type, equally_sampling=equally_sampling)

    if opt['em'] == 'validate':
        begin, end = 6513, 7010 
    elif opt['em'] == 'test':
        begin, end = 7010, 10000
    else:
        begin, end = 0, 6513

    feats_i, feats_m, feats_a = [], [], []
    for ix in range(begin, end):
        vid = 'video%d' % ix
        i = load_feats(data_i, vid, frames_idx)
        m = load_feats(data_m, vid, frames_idx)
        a = load_feats(data_a, vid, frames_idx)

        feats_i.append(i)
        feats_m.append(m)
        feats_a.append(a)

    feats_i = np.array(feats_i)
    feats_m = np.array(feats_m)
    feats_a = np.array(feats_a)

    mapping = {
        'a': feats_a,
        'm': feats_m,
        'i': feats_i
    }

    if opt['plot']:
        visualize(opt)
    elif opt['cal']:
        for modality in ['i', 'm', 'a', 'im', 'ia', 'ma', 'ima']:
            feats = []
            for char in modality:
                feats.append(mapping[char])

            data = np.concatenate(feats, axis=2)
            data = data.mean(1)
            intra, inter = cal_centers(opt['em'], torch.from_numpy(data).cuda())
            print('%4s\tIntra: %05.3f\tInter: %05.3f' % (modality, intra, inter))


    else:
        for modality in ['i', 'm', 'a', 'im', 'ia', 'ma', 'ima']:
            feats = []
            for char in modality:
                feats.append(mapping[char])

            data = np.concatenate(feats, axis=2)
            name = '%s.npy' % modality
            
            if opt['mean']:
                data = data.mean(1)
                pca = manifold.TSNE(n_components=2)
                collect = pca.fit_transform(data) #对样本进行降维
            elif opt['all']:
                bsz, seq_len, dim = data.shape
                data = data.reshape(bsz * seq_len, dim)
                pca = manifold.TSNE(n_components=2)
                collect = pca.fit_transform(data) #对样本进行降维
            else:
                assert len(data.shape) == 3
                seq_len = data.shape[1]
                collect = []
                for nf in range(seq_len):
                    x = data[:, nf, :]
                    pca = manifold.TSNE(n_components=2)
                    #pca = PCA(n_components=opt.pca)     #加载PCA算法,设置降维后主成分数目为2
                    reduced_x = pca.fit_transform(x) #对样本进行降维
                    collect.append(reduced_x)
                collect = np.stack(collect, 1)
            print(name, collect.shape)
            np.save(os.path.join(opt['pca_path'], name), collect)