예제 #1
0
    def __init__(self,
                 data,
                 buffer_size=8,
                 mode='threaded',
                 workers=None,
                 on_batch_loaded=None):
        valid = (
            'threaded',
            'multiprocessing',
        )
        utils.assert_raise(mode in valid, ValueError,
                           'mode must be one of: ' + ', '.join(valid))
        utils.assert_raise(buffer_size >= 2, ValueError,
                           'buffer_size must be greater or equal to 2')
        if mode == 'threaded':
            self._executor = C.ThreadPoolExecutor(workers)
        else:
            self._executor = C.ProcessPoolExecutor(workers)

        if on_batch_loaded is None:
            on_batch_loaded = _identity

        self._queue = PriorityQueue(buffer_size)
        self._data = data
        self._thread = None
        self._on_batch_loaded = on_batch_loaded
        self._cache_buffer = []
        self._caching = False
예제 #2
0
    def __init__(self,
                 data,
                 batch_size=1,
                 shuffle=True,
                 drop_last=False,
                 total_samples=None):
        utils.assert_raise(isinstance(data, (list, tuple)), ValueError,
                           '"data" must be a list or a tuple')

        self._batch_size = batch_size
        self._shuffle = shuffle
        self._drop_last = drop_last
        self._total_samples = total_samples

        self._container = self._create_container(data)

        l0 = len(self._container[0])
        for c in self._container[1:]:
            utils.assert_raise(
                len(c) == l0, ValueError,
                'All data must have the same length!')

        indices = self._container[0].indices

        for c in self._container:
            c._indices = indices

        self.reset()
예제 #3
0
    def split(self, ratio):
        """Check :meth:`cogitare.data.AbsDataHolder.split`

        Split the :class:`~cogitare.data.DataSet` into two :class:`~cogitare.data.DataSet`.

        Args:
            ratio (:obj:`float`): ratio of the split. Must be between 0 and 1.

        Returns:
            (data1, data2): two :class:`~cogitare.data.DataSet`.

        Example::

            >>> print(dataset)
            DataSet with:
                containers: [
                    TensorHolder with 1094x64 samples
                    TensorHolder with 1094x64 samples
                ],
                batch size: 64

            >>> ds1, ds2 = data.split(0.8)
            >>> print(ds1)
            DataSet with:
                containers: [
                    TensorHolder with 875x64 samples
                    TensorHolder with 875x64 samples
                ],
                batch size: 64

            >>> print(ds2)
            DataSet with:
                containers: [
                    TensorHolder with 219x64 samples
                    TensorHolder with 219x64 samples
                ],
                batch size: 64
        """
        utils.assert_raise(0 < ratio < 1, ValueError,
                           '"ratio" must be between 0 and 1')

        d1, d2 = [], []

        for c in self.container:
            a, b = c.split(ratio)
            d1.append(a)
            d2.append(b)

        data1 = self.__class__(d1,
                               batch_size=self._batch_size,
                               shuffle=self._shuffle,
                               drop_last=self._drop_last)
        data2 = self.__class__(d2,
                               batch_size=self._batch_size,
                               shuffle=self._shuffle,
                               drop_last=self._drop_last)

        return data1, data2
예제 #4
0
파일: model.py 프로젝트: wubizhi/cogitare
    def evaluate_with_metrics(self, dataset, metrics, *args, **kwargs):
        """
        Iterate over batches in the dataset using metrics defined in the ``metrics``
        argument, and then return a dict mapping {matric_name -> list of results}.

        This method does not affect training variables and can be used to evaluate the
        model performance in a different data (such as validation and test sets).

        The ``metrics`` must be defined as:

            - key: a name for this metric. The metric name must follow variable naming convention.

            - value: a callable object, that accepts two parameters as input. The first parameter
              will be the model output, and the second parameter will be the batch data.

        Args:
            dataset: batch iterator
            metrics (dict): a dict mapping metric name to a callable.
            args/kwargs: :meth:`~cogitare.Model.forward` arguments. If provided, the
                forward will receive these parameters.

        Returns:

            output (dict): a dict mapping the metric name with a list containing the metric
            output for each batch in the dataset.


        Example::

            >>> metrics = {
            ...     'loss': model.metric_loss,
            ...     'precision': metrics.precision
            ... }

            >>> model.evaluate_with_metrics(validation_dataset, metrics)
            {'loss': [1.0, 0.8, 0.9], 'precision': [0.6, 0.55, 0.58]}
        """

        utils.assert_raise(
            isinstance(metrics, dict), ValueError,
            '"metrics" must be a dict with metric_name -> metric_function')
        result = dict()

        for sample in dataset:
            output = self.predict(sample)

            for key, call in metrics.items():
                holder = result.get(key, list())
                holder.append(call(output, sample))

                result[key] = holder

        return result
예제 #5
0
    def __init__(self, monitor='epoch', desc=None, freq=1):
        super(ProgressBar, self).__init__(freq)
        if desc is None:
            desc = monitor
        utils.assert_raise(monitor in ('epoch', 'batch'), ValueError,
                           'Monitor must be one of: "epoch", "batch"')

        self._monitor = monitor
        self._desc = desc
        self._total = None
        self._bar = None
        self._var = None
예제 #6
0
    def total_samples(self, value):
        if hasattr(self._data, '__len__'):
            size = len(self._data)
        else:
            size = None

        if size is not None:
            utils.assert_raise(
                value <= size, ValueError,
                'The value must be lesser or equal to the'
                'length of the input data')
        utils.assert_raise(value >= 1, ValueError,
                           'number of samples must be greater or equal to 1')
        self._total_samples = value
        self._remaining_samples = value
        self._requires_reset = True
예제 #7
0
파일: model.py 프로젝트: wubizhi/cogitare
    def _apply_plugin(self, plugin, hook, override):
        utils.assert_raise(
            hook in self.valid_hooks, ValueError,
            'Invalid hook {}. Expected on of the following: {}'.format(
                hook, ', '.join(self.valid_hooks)))
        plugin = utils._ntuple(plugin, 1)

        container = self._plugins[hook]

        for p in plugin:
            if not isinstance(p, PluginInterface):
                p = PluginInterface.from_function(p)

            if p.name in container and not override:
                raise ValueError(
                    'A plugin with name "{}" already exists'.format(p.name))

            container[p.name] = p
예제 #8
0
    def __init__(self,
                 data,
                 batch_size=1,
                 shuffle=True,
                 drop_last=False,
                 total_samples=None,
                 mode='sequential',
                 single=False,
                 on_sample_loaded=None,
                 on_batch_loaded=None):
        valid_modes = ['threaded', 'multiprocessing', 'sequential']
        utils.assert_raise(mode in valid_modes, ValueError,
                           '"mode" must be one of: ' + ', '.join(valid_modes))

        if on_sample_loaded is None:
            on_sample_loaded = _identity
        if on_batch_loaded is None:
            on_batch_loaded = _identity

        self._indices = None
        self._single = single
        self._mode = mode
        self._total_samples = total_samples
        self._remaining_samples = None
        self._on_sample_loaded = on_sample_loaded
        self._on_batch_loaded = on_batch_loaded

        self._data = data
        self._batch_size = batch_size

        self._current_batch = 0
        self._drop_last = drop_last
        self._shuffle = shuffle

        self._requires_reset = True

        if mode == 'sequential':
            self._get = None
        elif mode == 'threaded':
            self._get = threaded.get
        else:
            self._get = multiprocessing.get
예제 #9
0
    def __init__(self,
                 input_size,
                 num_classes=2,
                 dropout=0.0,
                 bias=True,
                 use_cuda=False):
        super(LogisticRegression, self).__init__()
        self.use_cuda = use_cuda
        utils.assert_raise(num_classes >= 2, ValueError,
                           '"num_classes" must be greater than or equal 2')
        utils.assert_raise(0 <= dropout < 1, ValueError,
                           '"dropout" value must be between 0 and 1')
        utils.assert_raise(
            input_size >= 1, ValueError,
            '"input_size" value must be greater than or equal 1')
        self.arguments = {
            'input_size': input_size,
            'num_classes': num_classes,
            'dropout': dropout,
            'bias': bias,
        }

        self.linear = nn.Linear(input_size, num_classes, bias)

        if use_cuda:
            self.cuda()
예제 #10
0
    def split(self, ratio):
        """Split the data holder into two data holders.

        The first one will receive *total_samples * ratio* samples, and the second
        data holder will receive the remaining samples.

        Args:
            ratio (:obj:`float`): ratio of the split. Must be between 0 and 1.

        Returns:
            (data1, data2): two data holder, in the same type that the original.

        Example::

            >>> print(data)
            TensorHolder with 875x64 samples
            >>> data1, data2 = data.split(0.8)
            >>> print(data1)
            TensorHolder with 700x64 samples
            >>> print(data2)
            TensorHolder with 175x64 samples
        """
        utils.assert_raise(0 < ratio < 1, ValueError,
                           '"ratio" must be between 0 and 1')

        pos = int(math.floor(self.total_samples * ratio))

        data1 = self._clone()
        data2 = self._clone()

        data1._indices = self.indices[:pos]
        data2._indices = self.indices[pos:]
        data1._total_samples = pos
        data2._total_samples = self.total_samples - pos

        return data1, data2
예제 #11
0
 def test_assert_raise(self):
     for exp in (ValueError, IndexError, Exception):
         with pytest.raises(exp) as info:
             utils.assert_raise(1 == 2, exp, 'test message')
         self.assertIn('test message', str(info.value))
     utils.assert_raise(1 == 1, ValueError, 'not raises')
예제 #12
0
    def __init__(self,
                 input_size,
                 num_layers,
                 hidden_size,
                 activation=None,
                 in_dropout=0.0,
                 hidden_dropout=0.0,
                 loss_function=None,
                 bias=True,
                 use_cuda=None):
        super(FeedForward, self).__init__()
        if activation is None:
            activation = [nn.Tanh()] * (num_layers - 1)
            activation.append(nn.LogSoftmax(dim=1))
        if loss_function is None:
            loss_function = nn.NLLLoss()

        utils.assert_raise(input_size >= 1, ValueError,
                           '"input_size" must be greater or equal to 1')
        utils.assert_raise(num_layers >= 1, ValueError,
                           '"num_layers" must be greater or equal to 1')

        self.input_size = input_size
        self.num_layers = num_layers
        self.in_dropout = in_dropout
        self.loss_function = loss_function
        self.use_cuda = use_cuda

        self.hidden_size = utils._ntuple(hidden_size, num_layers)
        self.activation = utils._ntuple(activation, num_layers)
        self.hidden_dropout = utils._ntuple(hidden_dropout, num_layers)
        self.bias = utils._ntuple(bias, num_layers)

        # make some assertions before continuing
        utils.assert_raise(
            len(self.hidden_size) == num_layers, ValueError,
            '"hidden_size" must have the same length that "num_layers"')
        utils.assert_raise(
            len(self.activation) == num_layers, ValueError,
            '"activation" must have the same length that "num_layers"')
        utils.assert_raise(
            len(self.hidden_dropout) == num_layers, ValueError,
            '"hidden_dropout" must have the same length that "num_layers"')
        utils.assert_raise(
            len(self.bias) == num_layers, ValueError,
            '"bias" must have the same length that "num_layers"')

        self._mlp = self._make_model()

        if use_cuda:
            self.cuda()