Esempio n. 1
0
    def __call__(self, batch: List[List[str]], *args,
                 **kwargs) -> Union[List[np.ndarray], np.ndarray]:
        """
        Embed sentences from a batch.

        Args:
            batch: A list of tokenized text samples.

        Returns:
            A batch of ELMo embeddings.
        """
        if len(batch) > self.mini_batch_size:
            batch_gen = chunk_generator(batch, self.mini_batch_size)
            elmo_output_values = []
            for mini_batch in batch_gen:
                mini_batch_out = self._mini_batch_fit(mini_batch, *args,
                                                      **kwargs)
                elmo_output_values.extend(mini_batch_out)
        else:
            elmo_output_values = self._mini_batch_fit(batch, *args, **kwargs)

        if self.pad_zero:
            elmo_output_values = zero_pad(elmo_output_values)

        return elmo_output_values
 def _line_generator(self, shard_generator):
     for shard in shard_generator:
         line_generator = chunk_generator(shard, 1)
         for line in line_generator:
             line = line[0]
             char_ids, reversed_char_ids, token_ids, reversed_token_ids =\
                 self._line2ids(line)
             yield char_ids, reversed_char_ids, token_ids, reversed_token_ids
Esempio n. 3
0
    def gen_batches(self, batch_size: int, data_type: str = 'train', shuffle: Optional[bool] = None)\
            -> Iterator[Tuple[str, str]]:
        if shuffle is None:
            shuffle = self.shuffle

        tgt_data = self.data[data_type]
        shard_generator = self._shard_generator(tgt_data, shuffle=shuffle)

        for shard in shard_generator:
            if not (batch_size):
                bs = len(shard)
            lines_generator = chunk_generator(shard, bs)
            for lines in lines_generator:
                yield (lines, [None] * len(lines))
Esempio n. 4
0
    def gen_batches(self, batch_size: int, data_type: str = 'train', shuffle: Optional[bool] = None) \
            -> Iterator[Tuple[str, str]]:
        if shuffle is None:
            shuffle = self.shuffle

        tgt_data = self.data[data_type]
        shard_generator = self._shard_generator(tgt_data, shuffle=shuffle)

        for shard in shard_generator:
            if not (batch_size):
                bs = len(shard)
            lines_generator = chunk_generator(shard, bs)
            for lines in lines_generator:
                yield (lines, [None] * len(lines))
Esempio n. 5
0
    def __call__(self, batch: List[List[str]],
                 *args, **kwargs) -> Union[List[np.ndarray], np.ndarray]:
        """
        Embed sentences from a batch.

        Args:
            batch: A list of tokenized text samples.

        Returns:
            A batch of ELMo embeddings.
        """
        if len(batch) > self.mini_batch_size:
            batch_gen = chunk_generator(batch, self.mini_batch_size)
            elmo_output_values = []
            for mini_batch in batch_gen:
                mini_batch_out = self._mini_batch_fit(mini_batch, *args, **kwargs)
                elmo_output_values.extend(mini_batch_out)
        else:
            elmo_output_values = self._mini_batch_fit(batch, *args, **kwargs)

        if self.pad_zero:
            elmo_output_values = zero_pad(elmo_output_values)

        return elmo_output_values