コード例 #1
0
 def __getitem__(self, index):
     # Load label
     if self.load == 'asr' or self.load == 'text':
         y_batch = [y for y in self.Y[index]]
         y_pad_batch = target_padding(y_batch,
                                      max([len(v) for v in y_batch]))
         if self.load == 'text':
             return y_pad_batch
     # Load acoustic feature and pad
     x_batch = [
         torch.FloatTensor(np.load(os.path.join(self.root, x_file)))
         for x_file in self.X[index]
     ]
     x_pad_batch = pad_sequence(x_batch, batch_first=True)
     return x_pad_batch, y_pad_batch
コード例 #2
0
    def __init__(self,
                 run_mockingjay,
                 file_path,
                 sets,
                 bucket_size,
                 max_timestep=0,
                 max_label_len=0,
                 mock_config=None):

        self.run_mockingjay = run_mockingjay
        self.mock_config = mock_config
        self.class_num = 63
        # Open dataset
        x = []
        y = []
        for s in sets:
            with open(os.path.join(file_path, s + '_x.pkl'), 'rb') as fp:
                x += pickle.load(fp)
            with open(os.path.join(file_path, s + '_y.pkl'), 'rb') as fp:
                y += pickle.load(fp)
        assert len(x) == len(y)

        # Sort data w.r.t. length
        self.X = []
        self.Y = []
        sortd_len = [len(t) for t in x]
        sorted_x = [x[idx] for idx in reversed(np.argsort(sortd_len))]
        sorted_y = [y[idx] for idx in reversed(np.argsort(sortd_len))]

        # Bucketing
        for b in range(int(np.ceil(len(sorted_x) / bucket_size))):
            offset = b * bucket_size
            bound = min((b + 1) * bucket_size, len(sorted_x))
            bucket_max_timestep = min(max_timestep, len(sorted_x[offset]))
            self.X.append(
                zero_padding(sorted_x[offset:bound], bucket_max_timestep))
            bucket_max_label_len = min(
                max_label_len, max([len(v) for v in sorted_y[offset:bound]]))
            self.Y.append(
                target_padding(sorted_y[offset:bound], bucket_max_label_len))