Exemple #1
0
 def __call__(self, markups):
     items = self.items(markups)
     seqs = chop_drop(items, self.seq_len)
     seqs = self.shuffle(seqs)
     chunks = chop(seqs, self.batch_size)
     for chunk in chunks:
         yield self.batch(chunk)
Exemple #2
0
 def __call__(self, texts):
     items = self.items(texts)
     seqs = self.seqs(items)
     seqs = self.shuffle(seqs)
     chunks = chop(seqs, self.batch_size)
     for chunk in chunks:
         yield self.batch(chunk)
Exemple #3
0
 def __call__(self, markups):
     markups = self.sort(markups)
     items = (self.item(_) for _ in markups)
     # 0.02% sents longer then 128, just drop them
     items = (_ for _ in items if len(_.word_ids) <= self.seq_len)
     chunks = chop(items, self.batch_size)
     for chunk in chunks:
         yield self.batch(chunk)
Exemple #4
0
 def __call__(self, items):
     items = (self.item(_) for _ in items)
     chunks = chop(items, self.batch_size)
     for chunk in chunks:
         yield self.input(chunk)
Exemple #5
0
def test_chop():
    guess = chop(range(10), 3)
    etalon = [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
    assert etalon == list(guess)
Exemple #6
0
 def __call__(self, markups):
     markups = self.sort(markups)
     items = (self.item(_) for _ in markups)
     chunks = chop(items, self.batch_size)
     for chunk in chunks:
         yield self.batch(chunk)