Esempio n. 1
0
    def fit(self,
            s,
            a,
            s_next,
            r,
            absorbing,
            theta,
            batch_size=32,
            nb_epoch=10,
            shuffle=True,
            theta_metrics={}):
        """

        Args:
            s (numpy.array): the samples of the state (nsamples, state_dim)
            a (numpy.array): the samples of the state (nsamples, action_dim)
            s_next (numpy.array): the samples of the next (reached) state (nsamples, state_dim)
            r (numpy.array): the sample of the reward (nsamples, )
            theta (numpy.array): the sample of the Q-function parameters (1, n_params)
            batch_size (int): dimension of the batch used for a single step of the gradient
            nb_epoch (int): number of epochs
            verbose (int): 0 or 1. Verbosity mode. 0 = silent, 1 = verbose.
            callbacks (list): list of callbacks to be called during training.
                See [Keras Callbacks](https://keras.io/callbacks/).
            validation_split (float): float between 0 and 1:
                fraction of the training data to be used as validation data.
                The model will set apart this fraction of the training data,
                will not train on it, and will evaluate the loss and any model metrics
                on this data at the end of each epoch.
            validation_data (tuple): data on which to evaluate the loss and any model metrics
                at the end of each epoch. The model will not be trained on this data.
                This could be a tuple (val_s, val_a, val_s_next, val_r) or a tuple
                (val_s, val_a, val_s_next, val_r, val_theta).
            shuffle (boolean): whether to shuffle the training data before each epoch.
            theta_metrics (dict): dictionary storing the pairs (name: callable object).
                The callable object/function is used to evaluate the Q-function parameters
                at each iteration. The signature of the callable is simple: f(theta)
                e.g.: theta_metrics={'k': lambda theta: evaluate(theta)})

        Returns:
            A PBOHistory instance storing train information
        """
        s, a, s_next, r, absorbing, theta = self._standardize_user_data(
            s, a, s_next, r, absorbing, theta, check_batch_dim=False)

        all_actions = standardize_input_data(
            self.discrete_actions, ['all_actions'],
            [(None, self.action_dim)] if self.action_dim is not None else None,
            exception_prefix='discrete_actions')

        n_updates = 0
        history = {"theta": [], 'rho': []}
        for k in theta_metrics.keys():
            history.update({k: []})

        ins = s + a + s_next + [r, absorbing]
        self._make_train_function()
        f = self.train_function

        nb_train_sample = ins[0].shape[0]
        index_array = np.arange(nb_train_sample)

        # append evolution of theta for independent case
        for _ in range(len(self.theta_list) - 1):
            if self.incremental:
                tmp = theta[-1] + self.bellman_model.predict(theta[-1])
            else:
                tmp = self.bellman_model.predict(theta[-1])
            theta += [tmp]

        term_condition = self.term_condition
        stop = False
        old_theta = theta

        for epoch in range(nb_epoch):
            if stop:
                break

            if shuffle == 'batch':
                index_array = batch_shuffle(index_array, batch_size)
            elif shuffle:
                np.random.shuffle(index_array)
            batches = make_batches(nb_train_sample, batch_size)
            for batch_index, (batch_start, batch_end) in enumerate(batches):

                history["theta"].append(theta[0])
                if hasattr(self.bellman_model, '_model'):
                    history["rho"].append(
                        self.bellman_model._model.get_weights())
                else:
                    history["rho"].append(self.bellman_model.get_weights())
                for k, v in iteritems(theta_metrics):
                    history[k].append(v(theta))

                batch_ids = index_array[batch_start:batch_end]
                try:
                    if type(ins[-1]) is float:
                        # do not slice the training phase flag
                        ins_batch = slice_X(ins[:-1], batch_ids) + [ins[-1]]
                    else:
                        ins_batch = slice_X(ins, batch_ids)
                except TypeError:
                    raise Exception('TypeError while preparing batch. '
                                    'If using HDF5 input data, '
                                    'pass shuffle="batch".')
                inp = ins_batch + theta + all_actions
                outs = f(*inp)
                n_updates += 1

                if self.update_theta_every > 0 and n_updates % self.update_theta_every == 0:
                    tmp = self.apply_bo(theta[0],
                                        n_times=self.steps_per_theta_update)
                    theta = [tmp]
                    for _ in range(len(self.theta_list) - 1):
                        if self.incremental:
                            tmp = tmp + self.bellman_model.predict(tmp)
                        else:
                            tmp = self.bellman_model.predict(tmp)
                        theta += [tmp]

                    if term_condition is not None:
                        stop = term_condition(old_theta, theta)
                        if stop:
                            break
                        old_theta = theta

        # finally apply the bellman operator K-times to get the final point
        self.learned_theta_value = self.apply_bo(theta[0], n_times=100)
        if self.verbose > 1:
            print('learned theta: {}'.format(self.learned_theta_value))

        self.history = history
        return history
Esempio n. 2
0
    def fit(self,
            sast,
            r,
            batch_size=32,
            nb_epoch=10,
            shuffle=True,
            theta_metrics={}):
        """

        Args:
            s (numpy.array): the samples of the state (nsamples, state_dim)
            a (numpy.array): the samples of the state (nsamples, action_dim)
            s_next (numpy.array): the samples of the next (reached) state (nsamples, state_dim)
            r (numpy.array): the sample of the reward (nsamples, )
            theta (numpy.array): the sample of the Q-function parameters (1, n_params)
            batch_size (int): dimension of the batch used for a single step of the gradient
            nb_epoch (int): number of epochs
            verbose (int): 0 or 1. Verbosity mode. 0 = silent, 1 = verbose.
            callbacks (list): list of callbacks to be called during training.
                See [Keras Callbacks](https://keras.io/callbacks/).
            validation_split (float): float between 0 and 1:
                fraction of the training data to be used as validation data.
                The model will set apart this fraction of the training data,
                will not train on it, and will evaluate the loss and any model metrics
                on this data at the end of each epoch.
            validation_data (tuple): data on which to evaluate the loss and any model metrics
                at the end of each epoch. The model will not be trained on this data.
                This could be a tuple (val_s, val_a, val_s_next, val_r) or a tuple
                (val_s, val_a, val_s_next, val_r, val_theta).
            shuffle (boolean): whether to shuffle the training data before each epoch.
            theta_metrics (dict): dictionary storing the pairs (name: callable object).
                The callable object/function is used to evaluate the Q-function parameters
                at each iteration. The signature of the callable is simple: f(theta)
                e.g.: theta_metrics={'k': lambda theta: evaluate(theta)})

        Returns:
            A PBOHistory instance storing train information
        """
        sast = standardize_input_data(
            sast, ['sast'], (None, 2 * self.state_dim + self.action_dim + 1),
            exception_prefix='sast')[0]

        next_states_idx = self.state_dim + self.action_dim
        sa = sast[:, :next_states_idx]
        s_next = sast[:, next_states_idx:-1]
        absorbing = sast[:, -1]

        n_updates = 0

        maxq, maxa = self.maxQA(s_next, absorbing)

        if hasattr(self._estimator, 'adapt'):
            # update estimator structure
            self._estimator.adapt(iteration=self._iteration)

        # y = np.reshape(r + self.gamma * maxq, (-1, 1))
        y = r + self.gamma * maxq

        ins = [sa, y]
        self._make_train_function()
        f = self.train_function

        nb_train_sample = sa.shape[0]
        index_array = np.arange(nb_train_sample)
        history = {"theta": []}
        for k in theta_metrics.keys():
            history.update({k: []})

        for epoch in range(nb_epoch):
            if shuffle == 'batch':
                index_array = batch_shuffle(index_array, batch_size)
            elif shuffle:
                np.random.shuffle(index_array)

            batches = make_batches(nb_train_sample, batch_size)
            for batch_index, (batch_start, batch_end) in enumerate(batches):

                if hasattr(self._estimator, '_model'):
                    ltheta = self._model.get_weights()
                    history["theta"].append(ltheta)
                else:
                    ltheta = self._estimator.get_weights()
                    history["theta"].append(ltheta)
                for k, v in iteritems(theta_metrics):
                    history[k].append(v(ltheta))

                batch_ids = index_array[batch_start:batch_end]
                try:
                    if type(ins[-1]) is float:
                        # do not slice the training phase flag
                        ins_batch = slice_X(ins[:-1], batch_ids) + [ins[-1]]
                    else:
                        ins_batch = slice_X(ins, batch_ids)
                except TypeError:
                    raise Exception('TypeError while preparing batch. '
                                    'If using HDF5 input data, '
                                    'pass shuffle="batch".')

                outs = f(*ins_batch)
                n_updates += 1

                if self.update_theta_every > 0 \
                        and n_updates % self.update_theta_every == 0:
                    maxq, maxa = self.maxQA(s_next, absorbing)

                    if hasattr(self._estimator, 'adapt'):
                        # update estimator structure
                        self._estimator.adapt(iteration=self._iteration)

                    # y = np.reshape(r + self.gamma * maxq, (-1, 1))
                    y = r + self.gamma * maxq
                    ins = [ins[0], y]

        if self._verbose > 1:
            print('learned theta: {}'.format(self._estimator.get_weights()))

        return history
Esempio n. 3
0
    def fit(self, sast, r, batch_size=32, nb_epoch=10, shuffle=True, theta_metrics={}):
        """

        Args:
            s (numpy.array): the samples of the state (nsamples, state_dim)
            a (numpy.array): the samples of the state (nsamples, action_dim)
            s_next (numpy.array): the samples of the next (reached) state (nsamples, state_dim)
            r (numpy.array): the sample of the reward (nsamples, )
            theta (numpy.array): the sample of the Q-function parameters (1, n_params)
            batch_size (int): dimension of the batch used for a single step of the gradient
            nb_epoch (int): number of epochs
            verbose (int): 0 or 1. Verbosity mode. 0 = silent, 1 = verbose.
            callbacks (list): list of callbacks to be called during training.
                See [Keras Callbacks](https://keras.io/callbacks/).
            validation_split (float): float between 0 and 1:
                fraction of the training data to be used as validation data.
                The model will set apart this fraction of the training data,
                will not train on it, and will evaluate the loss and any model metrics
                on this data at the end of each epoch.
            validation_data (tuple): data on which to evaluate the loss and any model metrics
                at the end of each epoch. The model will not be trained on this data.
                This could be a tuple (val_s, val_a, val_s_next, val_r) or a tuple
                (val_s, val_a, val_s_next, val_r, val_theta).
            shuffle (boolean): whether to shuffle the training data before each epoch.
            theta_metrics (dict): dictionary storing the pairs (name: callable object).
                The callable object/function is used to evaluate the Q-function parameters
                at each iteration. The signature of the callable is simple: f(theta)
                e.g.: theta_metrics={'k': lambda theta: evaluate(theta)})

        Returns:
            A PBOHistory instance storing train information
        """
        sast = standardize_input_data(
            sast, ["sast"], (None, 2 * self.state_dim + self.action_dim + 1), exception_prefix="sast"
        )[0]

        next_states_idx = self.state_dim + self.action_dim
        sa = sast[:, :next_states_idx]
        s_next = sast[:, next_states_idx:-1]
        absorbing = sast[:, -1]

        n_updates = 0

        maxq, maxa = self.maxQA(s_next, absorbing)

        if hasattr(self._estimator, "adapt"):
            # update estimator structure
            self._estimator.adapt(iteration=self._iteration)

        # y = np.reshape(r + self.gamma * maxq, (-1, 1))
        y = r + self.gamma * maxq

        ins = [sa, y]
        self._make_train_function()
        f = self.train_function

        nb_train_sample = sa.shape[0]
        index_array = np.arange(nb_train_sample)
        history = {"theta": []}
        for k in theta_metrics.keys():
            history.update({k: []})

        for epoch in range(nb_epoch):
            if shuffle == "batch":
                index_array = batch_shuffle(index_array, batch_size)
            elif shuffle:
                np.random.shuffle(index_array)

            batches = make_batches(nb_train_sample, batch_size)
            for batch_index, (batch_start, batch_end) in enumerate(batches):

                if hasattr(self._estimator, "_model"):
                    ltheta = self._model.get_weights()
                    history["theta"].append(ltheta)
                else:
                    ltheta = self._estimator.get_weights()
                    history["theta"].append(ltheta)
                for k, v in iteritems(theta_metrics):
                    history[k].append(v(ltheta))

                batch_ids = index_array[batch_start:batch_end]
                try:
                    if type(ins[-1]) is float:
                        # do not slice the training phase flag
                        ins_batch = slice_X(ins[:-1], batch_ids) + [ins[-1]]
                    else:
                        ins_batch = slice_X(ins, batch_ids)
                except TypeError:
                    raise Exception(
                        "TypeError while preparing batch. " "If using HDF5 input data, " 'pass shuffle="batch".'
                    )

                outs = f(*ins_batch)
                n_updates += 1

                if self.update_theta_every > 0 and n_updates % self.update_theta_every == 0:
                    maxq, maxa = self.maxQA(s_next, absorbing)

                    if hasattr(self._estimator, "adapt"):
                        # update estimator structure
                        self._estimator.adapt(iteration=self._iteration)

                    # y = np.reshape(r + self.gamma * maxq, (-1, 1))
                    y = r + self.gamma * maxq
                    ins = [ins[0], y]

        if self._verbose > 1:
            print("learned theta: {}".format(self._estimator.get_weights()))

        return history
Esempio n. 4
0
def _fit_loop(self, f, ins, out_labels=None, batch_size=32,
              nb_epoch=100, verbose=1, callbacks=None,
              val_f=None, val_ins=None, shuffle=True,
              callback_metrics=None, initial_epoch=0):
    """Abstract fit function for f(ins).
    Assume that f returns a list, labeled by out_labels.

    # Arguments
        f: Keras function returning a list of tensors
        ins: list of tensors to be fed to `f`
        out_labels: list of strings, display names of
            the outputs of `f`
        batch_size: integer batch size
        nb_epoch: number of times to iterate over the data
        verbose: verbosity mode, 0, 1 or 2
        callbacks: list of callbacks to be called during training
        val_f: Keras function to call for validation
        val_ins: list of tensors to be fed to `val_f`
        shuffle: whether to shuffle the data at the beginning of each epoch
        callback_metrics: list of strings, the display names of the metrics
            passed to the callbacks. They should be the
            concatenation of list the display names of the outputs of
             `f` and the list of display names of the outputs of `f_val`.
        initial_epoch: epoch at which to start training
            (useful for resuming a previous training run)

    # Returns
        `History` object.

    [A tweaked version.]
    """
    do_validation = False
    if val_f and val_ins:
        do_validation = True
        if verbose:
            print('Train on %d samples, validate on %d samples' %
                  (ins[0].shape[0], val_ins[0].shape[0]))

    nb_train_sample = ins[0].shape[0]
    index_array = np.arange(nb_train_sample)

    self.history = cbks.History()
    callbacks = [cbks.BaseLogger()] + (callbacks or []) + [self.history]
    if verbose:
        callbacks += [cbks.ProgbarLogger()]
    callbacks = cbks.CallbackList(callbacks)
    out_labels = out_labels or []

    # it's possible to callback a different model than self
    # (used by Sequential models)
    if hasattr(self, 'callback_model') and self.callback_model:
        callback_model = self.callback_model
    else:
        callback_model = self

    callbacks.set_model(callback_model)
    callbacks.set_params({
        'batch_size': batch_size,
        'nb_epoch': nb_epoch,
        'nb_sample': nb_train_sample,
        'verbose': verbose,
        'do_validation': do_validation,
        'metrics': callback_metrics or [],
    })
    callbacks.on_train_begin()
    callback_model.stop_training = False
    self.validation_data = val_ins

    for epoch in range(initial_epoch, nb_epoch):
        callbacks.on_epoch_begin(epoch)
        if shuffle == 'batch':
            index_array = batch_shuffle(index_array, batch_size)
        elif shuffle:
            np.random.shuffle(index_array)

        batches = make_batches(nb_train_sample, batch_size)
        epoch_logs = {}
        for batch_index, (batch_start, batch_end) in enumerate(batches):
            batch_ids = index_array[batch_start:batch_end]
            try:
                if isinstance(ins[-1], float):
                    # do not slice the training phase flag
                    ins_batch = slice_X(ins[:-1], batch_ids) + [ins[-1]]
                else:
                    ins_batch = slice_X(ins, batch_ids)
            except TypeError:
                raise TypeError('TypeError while preparing batch. '
                                'If using HDF5 input data, '
                                'pass shuffle="batch".')
            batch_logs = {}
            batch_logs['batch'] = batch_index
            batch_logs['size'] = len(batch_ids)
            batch_logs['ids'] = batch_ids
            callbacks.on_batch_begin(batch_index, batch_logs)
            outs = f(ins_batch)
            if not isinstance(outs, list):
                outs = [outs]
            for l, o in zip(out_labels, outs):
                batch_logs[l] = o

            callbacks.on_batch_end(batch_index, batch_logs)

            if batch_index == len(batches) - 1:  # last batch
                # validation
                if do_validation:
                    # replace with self._evaluate
                    val_outs = self._test_loop(val_f, val_ins,
                                               batch_size=batch_size,
                                               verbose=0)
                    if not isinstance(val_outs, list):
                        val_outs = [val_outs]
                    # same labels assumed
                    for l, o in zip(out_labels, val_outs):
                        epoch_logs['val_' + l] = o
        callbacks.on_epoch_end(epoch, epoch_logs)
        if callback_model.stop_training:
            break
    callbacks.on_train_end()
    return self.history
Esempio n. 5
0
File: ebrm.py Progetto: teopir/ifqi
    def _fit_loop(self, f, ins, discrete_actions, out_labels=[],
                  batch_size=32, nb_epoch=100, verbose=1, callbacks=[],
                  val_f=None, val_ins=None, shuffle=True, callback_metrics=[],
                  theta_metrics={}):
        do_validation = False
        if val_f and val_ins:
            do_validation = True
            if verbose:
                print('Train on %d samples, validate on %d samples' %
                      (ins[0].shape[0], val_ins[0].shape[0]))

        nb_train_sample = ins[0].shape[0]
        print(nb_train_sample)
        index_array = np.arange(nb_train_sample)

        history = PBOHistory()
        callbacks = [history] + callbacks
        if verbose:
            callbacks += [cbks.ProgbarLogger()]
        callbacks = cbks.CallbackList(callbacks)
        callback_model = self

        callbacks._set_model(callback_model)
        callbacks._set_params({
            'batch_size': batch_size,
            'nb_epoch': nb_epoch,
            'nb_sample': nb_train_sample,
            'verbose': verbose,
            'do_validation': do_validation,
            'metrics': callback_metrics + [el for el in theta_metrics.keys()],
            'theta': self.q_model.trainable_weights[0].eval()
        })
        callbacks.on_train_begin()
        callback_model.stop_training = False

        for epoch in range(nb_epoch):
            callbacks.on_epoch_begin(epoch)
            if shuffle == 'batch':
                index_array = batch_shuffle(index_array, batch_size)
            elif shuffle:
                np.random.shuffle(index_array)

            batches = make_batches(nb_train_sample, batch_size)
            epoch_logs = {}
            for batch_index, (batch_start, batch_end) in enumerate(batches):
                batch_ids = index_array[batch_start:batch_end]
                try:
                    if type(ins[-1]) is float:
                        # do not slice the training phase flag
                        ins_batch = slice_X(ins[:-1], batch_ids) + [ins[-1]]
                    else:
                        ins_batch = slice_X(ins, batch_ids)
                except TypeError:
                    raise Exception('TypeError while preparing batch. '
                                    'If using HDF5 input data, '
                                    'pass shuffle="batch".')

                batch_logs = {}
                batch_logs['batch'] = batch_index
                batch_logs['size'] = len(batch_ids)
                batch_logs['theta'] = self.q_model.trainable_weights[0].eval()
                for k in theta_metrics.keys():
                    batch_logs[k] = theta_metrics[k](self.q_model.trainable_weights)
                callbacks.on_batch_begin(batch_index, batch_logs)

                inp = ins_batch + discrete_actions
                outs = f(*inp)

                if type(outs) != list:
                    outs = [outs]
                for l, o in zip(out_labels, outs):
                    batch_logs[l] = o

                callbacks.on_batch_end(batch_index, batch_logs)

                if batch_index == len(batches) - 1:  # last batch
                    # validation
                    if do_validation:
                        # replace with self._evaluate
                        val_outs = self._test_loop(val_f, val_ins,
                                                   batch_size=batch_size,
                                                   verbose=0)
                        if type(val_outs) != list:
                            val_outs = [val_outs]
                        # same labels assumed
                        for l, o in zip(out_labels, val_outs):
                            epoch_logs['val_' + l] = o

            callbacks.on_epoch_end(epoch, epoch_logs)

        callbacks.on_train_end()
        return history
Esempio n. 6
0
    def fit(self, s, a, s_next, r, absorbing, theta,
            batch_size=32, nb_epoch=10, shuffle=True,
            theta_metrics={}):
        """

        Args:
            s (numpy.array): the samples of the state (nsamples, state_dim)
            a (numpy.array): the samples of the state (nsamples, action_dim)
            s_next (numpy.array): the samples of the next (reached) state (nsamples, state_dim)
            r (numpy.array): the sample of the reward (nsamples, )
            theta (numpy.array): the sample of the Q-function parameters (1, n_params)
            batch_size (int): dimension of the batch used for a single step of the gradient
            nb_epoch (int): number of epochs
            verbose (int): 0 or 1. Verbosity mode. 0 = silent, 1 = verbose.
            callbacks (list): list of callbacks to be called during training.
                See [Keras Callbacks](https://keras.io/callbacks/).
            validation_split (float): float between 0 and 1:
                fraction of the training data to be used as validation data.
                The model will set apart this fraction of the training data,
                will not train on it, and will evaluate the loss and any model metrics
                on this data at the end of each epoch.
            validation_data (tuple): data on which to evaluate the loss and any model metrics
                at the end of each epoch. The model will not be trained on this data.
                This could be a tuple (val_s, val_a, val_s_next, val_r) or a tuple
                (val_s, val_a, val_s_next, val_r, val_theta).
            shuffle (boolean): whether to shuffle the training data before each epoch.
            theta_metrics (dict): dictionary storing the pairs (name: callable object).
                The callable object/function is used to evaluate the Q-function parameters
                at each iteration. The signature of the callable is simple: f(theta)
                e.g.: theta_metrics={'k': lambda theta: evaluate(theta)})

        Returns:
            A PBOHistory instance storing train information
        """
        s, a, s_next, r, absorbing, theta = self._standardize_user_data(
            s, a, s_next, r, absorbing, theta,
            check_batch_dim=False
        )

        all_actions = standardize_input_data(
            self.discrete_actions, ['all_actions'],
            [(None, self.action_dim)] if self.action_dim is not None else None,
            exception_prefix='discrete_actions')

        n_updates = 0
        history = {"theta": [], 'rho': []}
        for k in theta_metrics.keys():
            history.update({k: []})

        ins = s + a + s_next + [r, absorbing]
        self._make_train_function()
        f = self.train_function

        nb_train_sample = ins[0].shape[0]
        index_array = np.arange(nb_train_sample)

        # append evolution of theta for independent case
        for _ in range(len(self.theta_list) - 1):
            if self.incremental:
                tmp = theta[-1] + self.bellman_model.predict(theta[-1])
            else:
                tmp = self.bellman_model.predict(theta[-1])
            theta += [tmp]

        term_condition = self.term_condition
        stop = False
        old_theta = theta

        for epoch in range(nb_epoch):
            if stop:
                break

            if shuffle == 'batch':
                index_array = batch_shuffle(index_array, batch_size)
            elif shuffle:
                np.random.shuffle(index_array)
            batches = make_batches(nb_train_sample, batch_size)
            for batch_index, (batch_start, batch_end) in enumerate(batches):

                history["theta"].append(theta[0])
                if hasattr(self.bellman_model, '_model'):
                    history["rho"].append(
                        self.bellman_model._model.get_weights())
                else:
                    history["rho"].append(self.bellman_model.get_weights())
                for k, v in iteritems(theta_metrics):
                    history[k].append(v(theta))

                batch_ids = index_array[batch_start:batch_end]
                try:
                    if type(ins[-1]) is float:
                        # do not slice the training phase flag
                        ins_batch = slice_X(ins[:-1], batch_ids) + [ins[-1]]
                    else:
                        ins_batch = slice_X(ins, batch_ids)
                except TypeError:
                    raise Exception('TypeError while preparing batch. '
                                    'If using HDF5 input data, '
                                    'pass shuffle="batch".')
                inp = ins_batch + theta + all_actions
                outs = f(*inp)
                n_updates += 1

                if self.update_theta_every > 0 and n_updates % self.update_theta_every == 0:
                    tmp = self.apply_bo(theta[0],
                                        n_times=self.steps_per_theta_update)
                    theta = [tmp]
                    for _ in range(len(self.theta_list) - 1):
                        if self.incremental:
                            tmp = tmp + self.bellman_model.predict(tmp)
                        else:
                            tmp = self.bellman_model.predict(tmp)
                        theta += [tmp]

                    if term_condition is not None:
                        stop = term_condition(old_theta, theta)
                        if stop:
                            break
                        old_theta = theta

        # finally apply the bellman operator K-times to get the final point
        self.learned_theta_value = self.apply_bo(theta[0], n_times=100)
        if self.verbose > 1:
            print('learned theta: {}'.format(self.learned_theta_value))

        self.history = history
        return history
Esempio n. 7
0
    def _fit_loop(self,
                  f,
                  ins,
                  discrete_actions,
                  out_labels=[],
                  batch_size=32,
                  nb_epoch=100,
                  verbose=1,
                  callbacks=[],
                  val_f=None,
                  val_ins=None,
                  shuffle=True,
                  callback_metrics=[],
                  theta_metrics={}):
        do_validation = False
        if val_f and val_ins:
            do_validation = True
            if verbose:
                print('Train on %d samples, validate on %d samples' %
                      (ins[0].shape[0], val_ins[0].shape[0]))

        nb_train_sample = ins[0].shape[0]
        print(nb_train_sample)
        index_array = np.arange(nb_train_sample)

        history = PBOHistory()
        callbacks = [history] + callbacks
        if verbose:
            callbacks += [cbks.ProgbarLogger()]
        callbacks = cbks.CallbackList(callbacks)
        callback_model = self

        callbacks._set_model(callback_model)
        callbacks._set_params({
            'batch_size':
            batch_size,
            'nb_epoch':
            nb_epoch,
            'nb_sample':
            nb_train_sample,
            'verbose':
            verbose,
            'do_validation':
            do_validation,
            'metrics':
            callback_metrics + [el for el in theta_metrics.keys()],
            'theta':
            self.q_model.trainable_weights[0].eval()
        })
        callbacks.on_train_begin()
        callback_model.stop_training = False

        for epoch in range(nb_epoch):
            callbacks.on_epoch_begin(epoch)
            if shuffle == 'batch':
                index_array = batch_shuffle(index_array, batch_size)
            elif shuffle:
                np.random.shuffle(index_array)

            batches = make_batches(nb_train_sample, batch_size)
            epoch_logs = {}
            for batch_index, (batch_start, batch_end) in enumerate(batches):
                batch_ids = index_array[batch_start:batch_end]
                try:
                    if type(ins[-1]) is float:
                        # do not slice the training phase flag
                        ins_batch = slice_X(ins[:-1], batch_ids) + [ins[-1]]
                    else:
                        ins_batch = slice_X(ins, batch_ids)
                except TypeError:
                    raise Exception('TypeError while preparing batch. '
                                    'If using HDF5 input data, '
                                    'pass shuffle="batch".')

                batch_logs = {}
                batch_logs['batch'] = batch_index
                batch_logs['size'] = len(batch_ids)
                batch_logs['theta'] = self.q_model.trainable_weights[0].eval()
                for k in theta_metrics.keys():
                    batch_logs[k] = theta_metrics[k](
                        self.q_model.trainable_weights)
                callbacks.on_batch_begin(batch_index, batch_logs)

                inp = ins_batch + discrete_actions
                outs = f(*inp)

                if type(outs) != list:
                    outs = [outs]
                for l, o in zip(out_labels, outs):
                    batch_logs[l] = o

                callbacks.on_batch_end(batch_index, batch_logs)

                if batch_index == len(batches) - 1:  # last batch
                    # validation
                    if do_validation:
                        # replace with self._evaluate
                        val_outs = self._test_loop(val_f,
                                                   val_ins,
                                                   batch_size=batch_size,
                                                   verbose=0)
                        if type(val_outs) != list:
                            val_outs = [val_outs]
                        # same labels assumed
                        for l, o in zip(out_labels, val_outs):
                            epoch_logs['val_' + l] = o

            callbacks.on_epoch_end(epoch, epoch_logs)

        callbacks.on_train_end()
        return history
Esempio n. 8
0
    def _fit_loop(self,
                  f,
                  ins,
                  nb_train_samples,
                  out_labels=None,
                  batch_size=32,
                  nb_epoch=100,
                  verbose=1,
                  callbacks=None,
                  shuffle=True,
                  callback_metrics=None,
                  initial_epoch=0):
        """The core loop that fits the data."""

        index_array = np.arange(nb_train_samples)

        self.history = keras.callbacks.History()
        callbacks = [keras.callbacks.BaseLogger()] + (callbacks or [])
        callbacks += [self.history]
        if verbose:
            callbacks += [keras.callbacks.ProgbarLogger()]
        callbacks = keras.callbacks.CallbackList(callbacks)

        out_labels = out_labels or []

        if hasattr(self, 'callback_model') and self.callback_model:
            callback_model = self.callback_model
        else:
            callback_model = self

        callbacks._set_model(callback_model)
        callbacks._set_params({
            'batch_size': batch_size,
            'nb_epoch': nb_epoch,
            'nb_sample': nb_train_samples,
            'verbose': verbose,
            'do_validation': False,
            'metrics': callback_metrics or [],
        })

        callbacks.on_train_begin()
        callback_model.stop_training = False
        self.validation_data = None

        for epoch in range(initial_epoch, nb_epoch):
            callbacks.on_epoch_begin(epoch)
            if shuffle == 'batch':
                index_array = keras_training.batch_shuffle(
                    index_array, batch_size)
            elif shuffle:
                np.random.shuffle(index_array)

            batches = keras_training.make_batches(nb_train_samples, batch_size)
            epoch_logs = {}
            for batch_index, (batch_start, batch_end) in enumerate(batches):
                batch_ids = index_array[batch_start:batch_end]

                try:
                    if isinstance(ins[-1], float):
                        ins_batch = get_batch(ins[:-1], batch_ids)
                        ins_batch += [ins[-1]]
                    else:
                        ins_batch = get_batch(ins, batch_ids)
                except TypeError:
                    raise TypeError('TypeError while preparing batch. '
                                    'If using HDF5 input data, '
                                    'pass shuffle="batch".')

                batch_logs = {}
                batch_logs['batch'] = batch_index
                batch_logs['size'] = len(batch_ids)
                callbacks.on_batch_begin(batch_index, batch_logs)
                outs = f(ins_batch)
                if not isinstance(outs, list):
                    outs = [outs]
                for l, o in zip(out_labels, outs):
                    batch_logs[l] = o

                callbacks.on_batch_end(batch_index, batch_logs)
            callbacks.on_epoch_end(epoch, epoch_logs)
            if callback_model.stop_training:
                break
        callbacks.on_train_end()
        return self.history