Esempio n. 1
0
 def __init__(self, batch_size=256, seed=-1, shuffle_level=2,
              allow_rollback=True, labels=None,
              log_path=None, verbose=3):
   super(MainLoop, self).__init__()
   self._labels = labels
   self._main_task = None
   self._task = []
   self._subtask = []
   self._evaltask = []
   self._task_when = {} # mapping from `Task` to `Timer`
   self._task_freq = {} # mapping from `Task` to `Timer`
   self._allow_rollback = bool(allow_rollback)
   self._verbose = int(verbose)
   # create default RNG (no randomization)
   self._rng = struct()
   self._rng.randint = lambda *args, **kwargs: None
   # set batch
   self.set_batch(batch_size=batch_size, seed=seed,
                  shuffle_level=shuffle_level)
   self._callback = CallbackList()
   # ====== for the checkpoint ====== #
   self._save_path = None
   self._save_obj = None
   self._save_variables = []
   self._best_object = None
   self._save_history = True
   # ====== maximum stored checkpoint ====== #
   self._checkpoint_increasing = True
   self._checkpoint_max = -1
   self._current_checkpoint_count = 0
Esempio n. 2
0
 def __init__(self, batch_size=256, seed=-1, shuffle_level=0,
              allow_rollback=True, labels=None,
              log_path=None, verbose=3):
   super(MainLoop, self).__init__()
   self._labels = labels
   self._main_task = None
   self._task = []
   self._subtask = []
   self._evaltask = []
   self._task_when = {} # mapping from `Task` to `Timer`
   self._task_freq = {} # mapping from `Task` to `Timer`
   self._allow_rollback = bool(allow_rollback)
   self._verbose = int(verbose)
   # create default RNG (no randomization)
   self._rng = struct()
   self._rng.randint = lambda *args, **kwargs: None
   # set batch
   self.set_batch(batch_size=batch_size, seed=seed,
                  shuffle_level=shuffle_level)
   self._callback = CallbackList()
   # ====== for the checkpoint ====== #
   self._save_path = None
   self._save_obj = None
   self._save_variables = []
   self._best_object = None
   self._save_history = True
   # ====== maximum stored checkpoint ====== #
   self._checkpoint_increasing = True
   self._checkpoint_max = -1
   self._current_checkpoint_count = 0
Esempio n. 3
0
 def set_batch(self, batch_size=None, seed=-1, shuffle_level=None):
   if batch_size is not None:
     self._batch_size = batch_size
   if seed is None or seed >= 0:
     if seed is not None:
       self._rng = np.random.RandomState(seed)
     else:
       self._rng = struct()
       self._rng.randint = lambda x: None
       self._rng.rand = get_rng().rand
   if shuffle_level is not None:
     self._shuffle_level = min(max(int(shuffle_level), 0), 2)
   return self
Esempio n. 4
0
 def set_batch(self, batch_size=None, seed=-1, shuffle_level=None):
   if batch_size is not None:
     self._batch_size = batch_size
   if seed is None or seed >= 0:
     if seed is not None:
       self._rng = np.random.RandomState(seed)
     else:
       self._rng = struct()
       self._rng.randint = lambda x: None
       self._rng.rand = get_rng().rand
   if shuffle_level is not None:
     self._shuffle_level = min(max(int(shuffle_level), 0), 2)
   return self
Esempio n. 5
0
 def set_batch(self, batch_size=None, seed=-1, shuffle_level=None):
     if batch_size is not None:
         self._batch_size = batch_size
         self._nb_samples_per_epoch = min([len(i) for i in self._data])
     if seed is None or seed >= 0:
         if seed is not None:
             self._rng = np.random.RandomState(seed)
         else:
             self._rng = struct()
             self._rng.randint = lambda x: None
             self._rng.rand = RNG_GENERATOR.rand
     if shuffle_level is not None:
         self._shuffle_level = min(max(int(shuffle_level), 0), 2)
     return self
Esempio n. 6
0
    def set_batch(self, batch_size=None, seed=-1, shuffle_level=None):
        """
        Parameters
        ----------
        batch_size: int
            size of each batch return when iterate this Data
        seed: None, int
            if None, no shuffling is performed while iterating,
            if < 0, do not change the current seed
            if >= 0, enable randomization with given seed
        start: int, float
            if int, start indicates the index of starting data points to
            iterate. If float, start is the percentage of data to start.
        end: int, float
            ending point of the interation
        shuffle_level: int
            0: only shuffle the order of each batch
            1: shuffle the order of batches and inside each batch as well.
            2: includes level 0 and 1, and custom shuffling (strongest form)
        """
        if batch_size is not None:
            self._batch_size = batch_size
        if seed >= 0 or seed is None:
            if seed is not None:
                self._rng = np.random.RandomState(seed)
            else:
                self._rng = struct()
                self._rng.randint = lambda *args, **kwargs: None
        if shuffle_level is not None:
            shuffle_level = min(max(int(shuffle_level), 0), 2)
            self._shuffle_level = shuffle_level
        # ====== set_batch for Tasks ====== #
        if self._task is not None:
            self._task.set_batch(batch_size=batch_size,
                                 seed=seed,
                                 shuffle_level=shuffle_level)
        for i in self._subtask.itervalues():
            i.set_batch(batch_size=batch_size,
                        seed=seed,
                        shuffle_level=shuffle_level)
        for i in self._crosstask.itervalues():
            i.set_batch(batch_size=batch_size,
                        seed=seed,
                        shuffle_level=shuffle_level)

        return self
Esempio n. 7
0
    def __init__(self, batch_size=256, seed=-1, shuffle_level=0):
        super(MainLoop, self).__init__()
        self._task = None
        self._subtask = {}  # run 1 epoch after given frequence
        self._crosstask = {}  # randomly run 1 iter given probability

        # create default RNG (no randomization)
        self._rng = struct()
        self._rng.randint = lambda *args, **kwargs: None
        # set batch
        self.set_batch(batch_size=batch_size,
                       seed=seed,
                       shuffle_level=shuffle_level)

        self._callback = CallbackList()

        self._save_path = None
        self._save_hist = None
        self._save_obj = None
Esempio n. 8
0
 def set_batch(self, batch_size=None, seed=-1, shuffle_level=None):
   """
   Parameters
   ----------
   batch_size: int
       size of each batch return when iterate this Data
   seed: None, int
       if None, no shuffling is performed while iterating,
       if < 0, do not change the current seed
       if >= 0, enable randomization with given seed
   start: int, float
       if int, start indicates the index of starting data points to
       iterate. If float, start is the percentage of data to start.
   end: int, float
       ending point of the interation
   shuffle_level: int
       0: only shuffle the order of each batch
       1: shuffle the order of batches and inside each batch as well.
       2: includes level 0 and 1, and custom shuffling (strongest form)
   """
   if batch_size is not None:
     self._batch_size = batch_size
   if seed is None or seed >= 0:
     if seed is not None:
       self._rng = np.random.RandomState(seed)
     else:
       self._rng = struct()
       self._rng.randint = lambda *args, **kwargs: None
   if shuffle_level is not None:
     shuffle_level = min(max(int(shuffle_level), 0), 2)
     self._shuffle_level = shuffle_level
   # ====== set_batch for Tasks ====== #
   for i in self._task:
     i.set_batch(batch_size=batch_size, seed=seed,
                 shuffle_level=shuffle_level)
   for i in self._subtask:
     i.set_batch(batch_size=batch_size, seed=seed,
                 shuffle_level=shuffle_level)
   return self
Esempio n. 9
0
File: data.py Progetto: imito/odin
 def create_iteration():
   seed = self._seed; self._seed = None
   if seed is not None:
     rng = np.random.RandomState(seed)
   else: # deterministic RandomState
     rng = struct()
     rng.randint = lambda x: None
     rng.permutation = lambda x: slice(None, None)
   # ====== easy access many private variables ====== #
   sequential = self._sequential
   start, end = self._start, self._end
   batch_size = self._batch_size
   distribution = np.asarray(self._distribution)
   # shuffle order of data (good for sequential mode)
   idx = rng.permutation(len(self._data))
   data = self._data[idx] if isinstance(idx, slice) else [self._data[i] for i in idx]
   distribution = distribution[idx]
   shape = [i.shape[0] for i in data]
   # ====== prepare distribution information ====== #
   # number of sample should be traversed
   n = np.asarray([i * (_apply_approx(j, end) - _apply_approx(j, start))
                   for i, j in zip(distribution, shape)])
   n = np.round(n).astype(int)
   # normalize the distribution (base on new sample n of each data)
   distribution = n / n.sum()
   distribution = _approximate_continuos_by_discrete(distribution)
   # somehow heuristic, rescale distribution to get more benifit from cache
   if distribution.sum() <= len(data):
     distribution = distribution * 3
   # distribution now the actual batch size of each data
   distribution = (batch_size * distribution).astype(int)
   assert distribution.sum() % batch_size == 0, 'wrong distribution size!'
   # predefined (start,end) pair of each batch (e.g (0,256), (256,512))
   idx = list(range(0, batch_size + distribution.sum(), batch_size))
   idx = list(zip(idx, idx[1:]))
   # Dummy return to initialize everything
   yield None
   #####################################
   # 1. optimized parallel code.
   if not sequential:
     # first iterators
     it = [iter(dat.set_batch(bs, seed=rng.randint(10e8),
                              start=start, end=end,
                              shuffle_level=self._shuffle_level))
           for bs, dat in zip(distribution, data)]
     # iterator
     while sum(n) > 0:
       batch = []
       for i, x in enumerate(it):
         if n[i] <= 0:
           continue
         try:
           x = next(x)[:n[i]]
           n[i] -= x.shape[0]
           batch.append(x)
         except StopIteration: # one iterator stopped
           it[i] = iter(data[i].set_batch(distribution[i],
               seed=rng.randint(10e8), start=start, end=end,
               shuffle_level=self._shuffle_level))
           x = next(it[i])[:n[i]]
           n[i] -= x.shape[0]
           batch.append(x)
       # got final batch
       batch = np.vstack(batch)
       # no idea why random permutation is much faster than shuffle
       if self._shuffle_level > 0:
         batch = batch[rng.permutation(batch.shape[0])]
       # return the iterations
       for i, j in idx[:int(ceil(batch.shape[0] / batch_size))]:
         yield batch[i:j]
   #####################################
   # 2. optimized sequential code.
   else:
     # first iterators
     batch_size = distribution.sum()
     it = [iter(dat.set_batch(batch_size, seed=rng.randint(10e8),
                              start=start, end=end,
                              shuffle_level=self._shuffle_level))
           for dat in data]
     current_data = 0
     # iterator
     while sum(n) > 0:
       if n[current_data] <= 0:
         current_data += 1
       try:
         x = next(it[current_data])[:n[current_data]]
         n[current_data] -= x.shape[0]
       except StopIteration: # one iterator stopped
         it[current_data] = iter(data[current_data].set_batch(batch_size, seed=rng.randint(10e8),
                             start=start, end=end,
                             shuffle_level=self._shuffle_level))
         x = next(it[current_data])[:n[current_data]]
         n[current_data] -= x.shape[0]
       # shuffle x
       if self._shuffle_level > 0:
         x = x[rng.permutation(x.shape[0])]
       for i, j in idx[:int(ceil(x.shape[0] / self._batch_size))]:
         yield x[i:j]
Esempio n. 10
0
def _check_tag(var):
    if not hasattr(var, 'tag'):
        var.tag = struct()
Esempio n. 11
0
from six.moves import range, zip, cPickle

import numpy as np

from odin import (SIG_TRAIN_ROLLBACK, SIG_TRAIN_SAVE, SIG_TRAIN_STOP)
from odin.config import RNG_GENERATOR
from odin import fuel
from odin.fuel.dataset import Dataset
from odin.utils import struct, as_tuple, is_number

from .callbacks import *

# ===========================================================================
# Helper
# ===========================================================================
_SAVE_TASK = struct()
_SAVE_TASK.name = "save"


def __format_string(nb_of_float):
    x = ["{:.4f}"] * int(nb_of_float)
    return ";".join(x)


def standard_trainer(train_data,
                     valid_data,
                     X,
                     y_train,
                     y_score,
                     y_target,
                     parameters,