Ejemplo n.º 1
0
 def process_y_dataset(self,
                       data: List[List[str]],
                       max_len: Optional[int] = None,
                       subset: Optional[List[int]] = None) -> np.ndarray:
     if subset is not None:
         target = utils.get_list_subset(data, subset)
     else:
         target = data[:]
     y = np.array(target)
     return y
Ejemplo n.º 2
0
 def process_y_dataset(self,
                       data: List[str],
                       max_len: Optional[int] = None,
                       subset: Optional[List[int]] = None) -> np.ndarray:
     if subset is not None:
         target = utils.get_list_subset(data, subset)
     else:
         target = data
     if self.multi_label:
         return self.multi_label_binarizer.fit_transform(target)
     else:
         numerized_samples = self.numerize_label_sequences(target)
         return to_categorical(numerized_samples, len(self.label2idx))
Ejemplo n.º 3
0
    def process_x_dataset(self,
                          data: List[List[str]],
                          max_len: Optional[int] = None,
                          subset: Optional[List[int]] = None) -> np.ndarray:
        if max_len is None:
            max_len = self.sequence_length
        if subset is not None:
            target = utils.get_list_subset(data, subset)
        else:
            target = data
        numerized_samples = self.numerize_token_sequences(target)

        return pad_sequences(numerized_samples, max_len, padding='post', truncating='post')
Ejemplo n.º 4
0
 def process_y_dataset(self,
                       data: List[List[str]],
                       max_len: Optional[int] = None,
                       subset: Optional[List[int]] = None) -> np.ndarray:
     if subset is not None:
         target = utils.get_list_subset(data, subset)
     else:
         target = data[:]
     numerized_samples = self.numerize_label_sequences(target)
     padded_seq = pad_sequences(numerized_samples,
                                max_len,
                                padding='post',
                                truncating='post')
     return to_categorical(padded_seq, len(self.label2idx))
Ejemplo n.º 5
0
 def test_get_list_subset(self):
     x = list(range(0, 100))
     subset = get_list_subset(x, list(range(10, 20)))
     assert subset == [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]