Example #1
0
    def get_homogenous_batch_iter(self):
        end_of_iter = False
        while True:
            k_batches = 10
            batch_size = self.batch_size
            x = []
            y = []
            for k in xrange(k_batches):
                try:
                    dx, dy = PytablesBitextIterator.next(self)
                except StopIteration:
                    end_of_iter = True
                    break
                if dx is None or dy is None:
                    break
                x += dx
                y += dy
            if len(x) <= 0 or len(y) <= 0:
                raise StopIteration
            lens = numpy.asarray([map(len, x), map(len, y)])
            order = numpy.argsort(lens.max(axis=0)) if k_batches > 1 else numpy.arange(len(x))
            for k in range(k_batches):
                if k * batch_size > len(order):
                    break
                indices = order[k * batch_size:(k + 1) * batch_size]
                yield [[x[ii] for ii in indices], [y[ii] for ii in indices]]

            if end_of_iter:
                raise StopIteration
Example #2
0
    def get_homogenous_batch_iter(self):
        end_of_iter = False
        while True:
            k_batches = 10
            batch_size = self.batch_size
            x = []
            y = []
            for k in xrange(k_batches):
                try:
                    dx, dy = PytablesBitextIterator.next(self)
                except StopIteration:
                    end_of_iter = True
                    break
                if dx == None or dy == None:
                    break
                x += dx
                y += dy
            if len(x) <= 0 or len(y) <= 0:
                raise StopIteration
            lens = numpy.asarray([map(len, x), map(len, y)])
            order = numpy.argsort(lens.max(axis=0)) if k_batches > 1 else numpy.arange(len(x))
            for k in range(k_batches):
                if k * batch_size > len(order):
                    break
                indices = order[k * batch_size:(k + 1) * batch_size]
                yield [[x[ii] for ii in indices], [y[ii] for ii in indices]]

            if end_of_iter:
                raise StopIteration