def data_generator(x, y, batch_size=50):
     index_array = np.arange(len(x))
     while 1:
         batches = make_batches(len(x_test), batch_size)
         for batch_index, (batch_start, batch_end) in enumerate(batches):
             batch_ids = index_array[batch_start:batch_end]
             x_batch = x[batch_ids]
             y_batch = y[batch_ids]
             yield (x_batch, y_batch)
Exemple #2
0
    def evaluate(self, x, y, batch_size):
        """Evaluate the given data in test mode

        :param x: input data to use for evaluation
        :type x: torch.Tensor
        :param y: target data to use for evaluation
        :type y: torch.Tensor
        :param batch_size: number of samples to use per evaluation step
        :type batch_size: int
        :return: average metric values calculated between the outputs of the
         forward pass run on x and y
        :rtype: tuple(float)
        """

        self._assert_compiled()

        if self.device:
            self.network.to(self.device)

        batches = make_batches(x.shape[0], batch_size)
        metric_values_per_batch = []
        batch_sizes = []
        for idx_start, idx_end in batches:
            inputs = x[idx_start:idx_end]
            if self.n_outputs > 1:
                targets = []
                for idx_output in range(self.n_outputs):
                    targets.append(y[idx_output][idx_start:idx_end])
            else:
                targets = y[idx_start:idx_end]

            n_obs = inputs.shape[0]
            batch_sizes.append(n_obs)

            test_outputs = self.test_on_batch(inputs, targets)
            metric_values_per_batch.append(test_outputs)

        validation_outputs = []
        for idx_value in range(len(test_outputs)):
            validation_outputs.append(
                np.average([
                    metric_values[idx_value]
                    for metric_values in metric_values_per_batch
                ],
                           weights=batch_sizes))
        return validation_outputs
Exemple #3
0
    def predict(self, x, batch_size):
        """Generate output predictions for the input samples

        :param x: input data to predict on
        :type x: torch.Tensor
        :param batch_size: number of samples to predict on at one time
        :type batch_size: int
        :return: array of predictions
        :rtype: numpy.ndarray
        """

        batches = make_batches(len(x), batch_size)
        predictions_per_batch = []
        for idx_batch, (idx_start, idx_end) in enumerate(batches):
            inputs = x[idx_start:idx_end]
            predictions = self.network.forward(inputs)
            predictions_per_batch.append(predictions)

        batch_predictions = torch.cat(predictions_per_batch)
        return batch_predictions
Exemple #4
0
    def __getitem__(self, index):
        assert 0 <= index < self.length

        if index == 0:
            self.epoch += 1

            if self.shuffle == "batch":
                self.index_array = batch_shuffle(self.index_array,
                                                 self.batch_size)
            elif self.shuffle:
                np.random.shuffle(self.index_array)

            batches = make_batches(self.num_samples, self.batch_size)
            self.batch_enumerator = enumerate(batches)

        batch_index, (batch_start, batch_end) = next(self.batch_enumerator)
        batch_ids = self.index_array[batch_start:batch_end]

        try:
            x_batch = slice_arrays(self.x, batch_ids)[0]
            y_batch = slice_arrays(self.y, batch_ids)[0]
            sample_weights_batch = slice_arrays(self.sample_weights,
                                                batch_ids)[0]
        except TypeError:
            raise TypeError('TypeError while preparing batch. '
                            'If using HDF5 input data, '
                            'pass shuffle="batch".')

        if self.incremental != False and self.incremental[0] <= self.epoch:
            x_batch_adv = self.attack.perturb(
                x_batch, y_batch,
                min((self.epoch - self.incremental[0]) /
                    (self.incremental[1] - self.incremental[0]), 1))
        elif self.incremental == False or self.incremental[1] <= self.epoch:
            x_batch_adv = self.attack.perturb(x_batch, y_batch, 1)
        else:
            x_batch_adv = x_batch

        return x_batch_adv, y_batch, sample_weights_batch
Exemple #5
0
def predict_loop(model, f, ins, batch_size=32, verbose=0, steps=None):
    """Abstract method to loop over some data in batches.

    # Arguments
        model: Keras model instance.
        f: Keras function returning a list of tensors.
        ins: list of tensors to be fed to `f`.
        batch_size: integer batch size.
        verbose: verbosity mode.
        steps: Total number of steps (batches of samples)
            before declaring `predict_loop` finished.
            Ignored with the default value of `None`.

    # Returns
        Array of predictions (if the model has a single output)
        or list of arrays of predictions
        (if the model has multiple outputs).
    """
    num_samples = check_num_samples(ins,
                                    batch_size=batch_size,
                                    steps=steps,
                                    steps_name='steps')
    if verbose == 1:
        if steps is not None:
            progbar = Progbar(target=steps)
        else:
            progbar = Progbar(target=num_samples)

    indices_for_conversion_to_dense = []
    is_sparse = False
    for i in range(len(model._feed_inputs)):
        if issparse(ins[i]) and not K.is_sparse(model._feed_inputs[i]):
            indices_for_conversion_to_dense.append(i)
        elif issparse(ins[i]) and K.is_sparse(model._feed_inputs[i]):
            is_sparse = True
    if steps is not None:
        # Step-based predictions.
        # Since we do not know how many samples
        # we will see, we cannot pre-allocate
        # the returned Numpy arrays.
        # Instead, we store one array per batch seen
        # and concatenate them upon returning.
        unconcatenated_outs = []
        for step in range(steps):
            batch_outs = f(ins)
            batch_outs = to_list(batch_outs)
            if step == 0:
                for batch_out in batch_outs:
                    unconcatenated_outs.append([])
            for i, batch_out in enumerate(batch_outs):
                unconcatenated_outs[i].append(batch_out)
            if verbose == 1:
                progbar.update(step + 1)
        if is_sparse:
            if len(unconcatenated_outs) == 1:
                return vstack(unconcatenated_outs[0], 'csr')
            return [
                vstack(unconcatenated_outs[i], 'csr')
                for i in range(len(unconcatenated_outs))
            ]
        if len(unconcatenated_outs) == 1:
            return np.concatenate(unconcatenated_outs[0], axis=0)
        return [
            np.concatenate(unconcatenated_outs[i], axis=0)
            for i in range(len(unconcatenated_outs))
        ]
    else:
        # Sample-based predictions.
        outs = []
        batches = make_batches(num_samples, batch_size)
        index_array = np.arange(num_samples)
        for batch_index, (batch_start, batch_end) in enumerate(batches):
            batch_ids = index_array[batch_start:batch_end]
            if ins and isinstance(ins[-1], float):
                # Do not slice the training phase flag.
                ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]]
            else:
                ins_batch = slice_arrays(ins, batch_ids)
            for i in indices_for_conversion_to_dense:
                ins_batch[i] = ins_batch[i].toarray()

            batch_outs = f(ins_batch)
            batch_outs = to_list(batch_outs)
            if batch_index == 0:
                # Pre-allocate the results arrays.
                for batch_out in batch_outs:
                    shape = (num_samples, ) + batch_out.shape[1:]
                    if is_sparse:
                        outs.append(lil_matrix(shape, dtype=batch_out.dtype))
                    else:
                        outs.append(np.zeros(shape, dtype=batch_out.dtype))
            for i, batch_out in enumerate(batch_outs):
                outs[i][batch_start:batch_end] = batch_out
            if verbose == 1:
                progbar.update(batch_end)
        if is_sparse:
            return unpack_singleton(list(map(lambda oo: oo.tocsr(), outs)))
        return unpack_singleton(outs)
Exemple #6
0
    def fit(self,
            x,
            y,
            batch_size,
            n_epochs=1,
            callbacks=None,
            validation_data=None):
        """Trains the network on the given data for a fixed number of epochs

        :param x: input data to train on
        :type x: torch.Tensor
        :param y: target data to train on
        :type y: torch.Tensor
        :param batch_size: number of samples to use per forward and backward
         pass
        :type batch_size: int
        :param n_epochs: number of epochs (iterations of the dataset) to train
         the model
        :type n_epochs: int
        :param callbacks: callbacks to be used during training
        :type callbacks: list[object]
        :param validation_data: data on which to evaluate the loss and metrics
         at the end of each epoch
        :type validation_data: tuple(numpy.ndarray)
        """

        default_callbacks = self._load_default_callbacks()
        default_callbacks.append(ProgbarLogger(count_mode='samples'))
        if callbacks:
            default_callbacks.extend(callbacks)
        callbacks = CallbackList(default_callbacks)

        self._assert_compiled()

        if self.device:
            self.network.to(self.device)

        metrics = ['loss']
        if self.n_outputs > 1:
            for idx_output in range(1, self.n_outputs + 1):
                metrics.append('loss{}'.format(idx_output))
        if validation_data is not None:
            metrics.append('val_loss')
            if self.n_outputs > 1:
                for idx_output in range(1, self.n_outputs + 1):
                    metrics.append('val_loss{}'.format(idx_output))
        for metric_name in self.metric_names:
            metrics.append(metric_name)
            if validation_data is not None:
                metrics.append('val_{}'.format(metric_name))

        index_array = np.arange(x.shape[0])

        callbacks.set_params({
            'batch_size': batch_size,
            'epochs': n_epochs,
            'metrics': metrics,
            'steps': None,
            'samples': x.shape[0],
            'verbose': True
        })
        callbacks.set_model(self)

        callbacks.on_train_begin()
        for idx_epoch in range(n_epochs):
            if self.stop_training:
                break

            epoch_logs = {}
            callbacks.on_epoch_begin(idx_epoch)

            np.random.shuffle(index_array)
            batches = make_batches(len(index_array), batch_size)
            for idx_batch, (idx_start, idx_end) in enumerate(batches):
                batch_logs = {'batch': idx_batch, 'size': idx_end - idx_start}
                callbacks.on_batch_begin(idx_batch, batch_logs)

                inputs = x[index_array[idx_start:idx_end]]
                if self.n_outputs > 1:
                    targets = []
                    for idx_output in range(self.n_outputs):
                        targets.append(
                            y[idx_output][index_array[idx_start:idx_end]])
                else:
                    targets = y[index_array[idx_start:idx_end]]
                train_outputs = self.train_on_batch(inputs, targets)

                batch_logs['loss'] = train_outputs[0]
                if self.n_outputs > 1:
                    for idx_output in range(1, self.n_outputs + 1):
                        batch_logs['loss{}'.format(idx_output)] = (
                            train_outputs[idx_output])

                idx_metric_values = (1 if self.n_outputs == 1 else
                                     self.n_outputs + 1)
                it = zip(self.metric_names, train_outputs[idx_metric_values:])
                for metric_name, train_output in it:
                    batch_logs[metric_name] = train_output
                callbacks.on_batch_end(idx_batch, batch_logs)

                if self.stop_training:
                    break

            if validation_data:
                val_outputs = self.evaluate(validation_data[0],
                                            validation_data[1], batch_size)

                epoch_logs['val_loss'] = val_outputs[0]
                if self.n_outputs > 1:
                    for idx_output in range(1, self.n_outputs + 1):
                        epoch_logs['val_loss{}'.format(idx_output)] = (
                            val_outputs[idx_output])

                idx_metric_values = (1 if self.n_outputs == 1 else
                                     self.n_outputs + 1)
                it = zip(self.metric_names, val_outputs[idx_metric_values:])
                for metric_name, val_output in it:
                    metric_name = 'val_{}'.format(metric_name)
                    epoch_logs[metric_name] = val_output
            callbacks.on_epoch_end(idx_epoch, epoch_logs)
        callbacks.on_train_end()