Exemple #1
0
 def __init__(self,
              train_path,
              valid_path=None,
              test_path=None,
              train_size=None,
              cached=False,
              post_processing=None,
              shuffle_memory=False,
              curriculum=None):
     self._train_path = train_path
     self._valid_path = valid_path
     self._test_path = test_path
     self._train_size = train_size
     self._cache_on_memory = cached
     self._cached_train_data = None
     self._post_processing = post_processing if post_processing else lambda x: x
     self._shuffle_memory = shuffle_memory
     self._curriculum = curriculum
     self._curriculum_count = 0
     if curriculum and not callable(curriculum):
         raise Exception("curriculum function must be callable")
     if curriculum and not cached:
         raise Exception(
             "curriculum learning needs training data to be cached")
     if self._cache_on_memory:
         logging.info("Cache on memory")
         self._cached_train_data = list(
             map(self._post_processing,
                 StreamPickler.load(open(self._train_path))))
         self._train_size = len(self._cached_train_data)
         if self._shuffle_memory:
             logging.info("Shuffle on-memory data")
             global_rand.shuffle(self._cached_train_data)
Exemple #2
0
 def __init__(self, train_path, valid_path=None, test_path=None, train_size=None,
              cached=False, post_processing=None, shuffle_memory=False):
     self._train_path = train_path
     self._valid_path = valid_path
     self._test_path = test_path
     self._train_size = train_size
     self._cache_on_memory = cached
     self._cached_train_data = None
     self._post_processing = post_processing if post_processing else lambda x: x
     self._shuffle_memory = shuffle_memory
     if self._cache_on_memory:
         logging.info("Cache on memory")
         self._cached_train_data = list(map(self._post_processing, StreamPickler.load(open(self._train_path))))
         if self._shuffle_memory:
             logging.info("Shuffle on-memory data")
             global_rand.shuffle(self._cached_train_data)
Exemple #3
0
    def _yield_data(self, subset):
        subset = list(subset)
        global_rand.shuffle(subset)

        bunch_stack_x = [[] for _ in range(self.size)]
        bunch_stack_y = [[] for _ in range(self.size)]

        for x, y in subset:
            stack_lens = map(len, bunch_stack_x)
            shortest_i = stack_lens.index(min(stack_lens))
            bunch_stack_x[shortest_i].extend(x)
            bunch_stack_y[shortest_i].extend(y)
        self._pad_zeros(bunch_stack_x)
        self._pad_zeros(bunch_stack_y)
        pieces_x = self._cut_to_pieces(bunch_stack_x)
        pieces_y = self._cut_to_pieces(bunch_stack_y)
        logging.info("%d pieces this time" % int(float(len(bunch_stack_x[0])) / self.fragment_length))
        for piece in zip(pieces_x, pieces_y):
            yield piece
Exemple #4
0
    def _yield_data(self, subset):
        subset = list(subset)
        global_rand.shuffle(subset)

        bunch_stack_x = [[] for _ in range(self.size)]
        bunch_stack_y = [[] for _ in range(self.size)]

        for x, y in subset:
            stack_lens = map(len, bunch_stack_x)
            shortest_i = stack_lens.index(min(stack_lens))
            bunch_stack_x[shortest_i].extend(x)
            bunch_stack_y[shortest_i].extend(y)
        self._pad_zeros(bunch_stack_x)
        self._pad_zeros(bunch_stack_y)
        pieces_x = self._cut_to_pieces(bunch_stack_x)
        pieces_y = self._cut_to_pieces(bunch_stack_y)
        logging.info("%d pieces this time" % int(float(len(bunch_stack_x[0])) / self.fragment_length))
        for piece in zip(pieces_x, pieces_y):
            yield piece
Exemple #5
0
 def __init__(self, train_path, valid_path=None, test_path=None, train_size=None,
              cached=False, post_processing=None, shuffle_memory=False, curriculum=None):
     self._train_path = train_path
     self._valid_path = valid_path
     self._test_path = test_path
     self._train_size = train_size
     self._cache_on_memory = cached
     self._cached_train_data = None
     self._post_processing = post_processing if post_processing else lambda x: x
     self._shuffle_memory = shuffle_memory
     self._curriculum = curriculum
     self._curriculum_count = 0
     if curriculum and not callable(curriculum):
         raise Exception("curriculum function must be callable")
     if curriculum and not cached:
         raise Exception("curriculum learning needs training data to be cached")
     if self._cache_on_memory:
         logging.info("Cache on memory")
         self._cached_train_data = list(map(self._post_processing, StreamPickler.load(open(self._train_path))))
         if self._shuffle_memory:
             logging.info("Shuffle on-memory data")
             global_rand.shuffle(self._cached_train_data)
Exemple #6
0
 def __init__(self,
              train_path,
              valid_path=None,
              test_path=None,
              train_size=None,
              cache_on_memory=False,
              post_processing=None,
              shuffle_memory=False):
     self._train_path = train_path
     self._valid_path = valid_path
     self._test_path = test_path
     self._train_size = train_size
     self._cache_on_memory = cache_on_memory
     self._cached_train_data = None
     self._post_processing = post_processing if post_processing else lambda x: x
     self._shuffle_memory = shuffle_memory
     if self._cache_on_memory:
         logging.info("Cache on memory")
         self._cached_train_data = list(
             map(self._post_processing,
                 StreamPickler.load(open(self._train_path))))
         if self._shuffle_memory:
             logging.info("Shuffle on-memory data")
             global_rand.shuffle(self._cached_train_data)
Exemple #7
0
 def __init__(self, train_path, valid_path=None, test_path=None, train_size=None,
              cached=False, post_processing=None, shuffle_memory=False, data_processor=None):
     """
     :type data_processor: DataProcessor
     """
     self._train_path = train_path
     self._valid_path = valid_path
     self._test_path = test_path
     self._train_size = train_size
     self._cache_on_memory = cached
     self._cached_train_data = None
     self._post_processing = post_processing if post_processing else lambda x: x
     self._shuffle_memory = shuffle_memory
     self._epoch = 0
     self._data_processor = data_processor
     if data_processor and not isinstance(data_processor, DataProcessor):
         raise Exception("data_processor must be an instance of DataProcessor.")
     if self._cache_on_memory:
         logging.info("Cache on memory")
         self._cached_train_data = list(map(self._post_processing, StreamPickler.load(open(self._train_path))))
         self._train_size = len(self._cached_train_data)
         if self._shuffle_memory:
             logging.info("Shuffle on-memory data")
             global_rand.shuffle(self._cached_train_data)
default_model = os.path.join(os.path.dirname(__file__), "models", "mlp_distortion1.gz")

mnist = MnistDataset()

logging.info("transforming images with elastic distortion")

expanded_train_set = []

for img, label in mnist.train_set():
    expanded_train_set.append((img, label))
    original_img = (img * 256).reshape((28, 28))
    transformed_img = (elastic_distortion(original_img) / 256).flatten()
    expanded_train_set.append((transformed_img, label))

global_rand.shuffle(expanded_train_set)

expanded_mnist = BasicDataset(train=expanded_train_set, valid=mnist.valid_set(), test=mnist.test_set())

logging.info("expanded training data size: %d" % len(expanded_train_set))

if __name__ == '__main__':
    model = NeuralClassifier(input_dim=28 * 28)
    model.stack(Dense(256, 'relu'),
                Dense(256, 'relu'),
                Dense(10, 'linear'),
                Softmax())

    trainer = MomentumTrainer(model)

    annealer = LearningRateAnnealer()
default_model = os.path.join(os.path.dirname(__file__), "models",
                             "mlp_distortion1.gz")

mnist = MnistDataset()

logging.info("transforming images with elastic distortion")

expanded_train_set = []

for img, label in mnist.train_set():
    expanded_train_set.append((img, label))
    original_img = (img * 256).reshape((28, 28))
    transformed_img = (elastic_distortion(original_img) / 256).flatten()
    expanded_train_set.append((transformed_img, label))

global_rand.shuffle(expanded_train_set)

expanded_mnist = BasicDataset(train=expanded_train_set,
                              valid=mnist.valid_set(),
                              test=mnist.test_set())

logging.info("expanded training data size: %d" % len(expanded_train_set))

if __name__ == '__main__':
    model = NeuralClassifier(input_dim=28 * 28)
    model.stack(Dense(256, 'relu'), Dense(256, 'relu'), Dense(10, 'linear'),
                Softmax())

    trainer = MomentumTrainer(model)

    annealer = LearningRateAnnealer()