Example #1
0
    def get_homogenous_batch_iter(self):
        end_of_iter = False
        while True:
            k_batches = 10
            batch_size = self.batch_size
            x = []
            y = []
            for k in xrange(k_batches):
                try:
                    dx, dy = PytablesBitextIterator.next(self)
                except StopIteration:
                    end_of_iter = True
                    break
                if dx is None or dy is None:
                    break
                x += dx
                y += dy
            if len(x) <= 0 or len(y) <= 0:
                raise StopIteration
            lens = numpy.asarray([map(len, x), map(len, y)])
            order = numpy.argsort(lens.max(axis=0)) if k_batches > 1 else numpy.arange(len(x))
            for k in range(k_batches):
                if k * batch_size > len(order):
                    break
                indices = order[k * batch_size:(k + 1) * batch_size]
                yield [[x[ii] for ii in indices], [y[ii] for ii in indices]]

            if end_of_iter:
                raise StopIteration
Example #2
0
    def get_homogenous_batch_iter(self):
        end_of_iter = False
        while True:
            k_batches = 10
            batch_size = self.batch_size
            x = []
            y = []
            for k in xrange(k_batches):
                try:
                    dx, dy = PytablesBitextIterator.next(self)
                except StopIteration:
                    end_of_iter = True
                    break
                if dx == None or dy == None:
                    break
                x += dx
                y += dy
            if len(x) <= 0 or len(y) <= 0:
                raise StopIteration
            lens = numpy.asarray([map(len, x), map(len, y)])
            order = numpy.argsort(lens.max(axis=0)) if k_batches > 1 else numpy.arange(len(x))
            for k in range(k_batches):
                if k * batch_size > len(order):
                    break
                indices = order[k * batch_size:(k + 1) * batch_size]
                yield [[x[ii] for ii in indices], [y[ii] for ii in indices]]

            if end_of_iter:
                raise StopIteration
Example #3
0
def load_data(batch_size=128):
    ''' 
    Loads the dataset
    '''

    path_src = '../data/vocab_and_data_small_europarl_v7_enfr/bin_test.fr.h5'
    path_trg = '../data/vocab_and_data_small_europarl_v7_enfr/bin_test.en.h5'

    #############
    # LOAD DATA #
    #############

    print '... initializing data iterators'

    train = PytablesBitextIterator(batch_size,
                                   path_trg,
                                   path_src,
                                   use_infinite_loop=False)
    valid = None
    test = None

    return train, valid, test
Example #4
0
 def __init__(self, *args, **kwargs):
     PytablesBitextIterator.__init__(self, *args, **kwargs)
     self.batch_iter = None
Example #5
0
 def __init__(self, *args, **kwargs):
     PytablesBitextIterator.__init__(self, *args, **kwargs)
     self.batch_iter = None