コード例 #1
0
ファイル: DQNdrone.py プロジェクト: BrainsGarden/AirSim
def huber_loss(y, y_hat, delta):
    """ Compute the Huber Loss as part of the model graph

    Huber Loss is more robust to outliers. It is defined as:
     if |y - y_hat| < delta :
        0.5 * (y - y_hat)**2
    else :
        delta * |y - y_hat| - 0.5 * delta**2

    Attributes:
        y (Tensor[-1, 1]): Target value
        y_hat(Tensor[-1, 1]): Estimated value
        delta (float): Outliers threshold

    Returns:
        CNTK Graph Node
    """
    half_delta_squared = 0.5 * delta * delta
    error = y - y_hat
    abs_error = abs(error)

    less_than = 0.5 * square(error)
    more_than = (delta * abs_error) - half_delta_squared
    loss_per_sample = element_select(less(abs_error, delta), less_than, more_than)

    return reduce_sum(loss_per_sample, name='loss')
コード例 #2
0
def huber_loss(y, y_hat, delta):
    """ Compute the Huber Loss as part of the model graph

    Huber Loss is more robust to outliers. It is defined as:
     if |y - y_hat| < delta :
        0.5 * (y - y_hat)**2
    else :
        delta * |y - y_hat| - 0.5 * delta**2

    Attributes:
        y (Tensor[-1, 1]): Target value
        y_hat(Tensor[-1, 1]): Estimated value
        delta (float): Outliers threshold

    Returns:
        CNTK Graph Node
    """
    half_delta_squared = 0.5 * delta * delta
    error = y - y_hat
    abs_error = abs(error)

    less_than = 0.5 * square(error)
    more_than = (delta * abs_error) - half_delta_squared
    loss_per_sample = element_select(less(abs_error, delta), less_than, more_than)

    return reduce_sum(loss_per_sample, name='loss')
コード例 #3
0
ファイル: base.py プロジェクト: debidatta/malmo-ai
def huber_loss(y_hat, y, delta):
    """
    Compute the Huber Loss as part of the model graph

    Huber Loss is more robust to outliers. It is defined as:
     if |y - h_hat| < delta :
        0.5 * (y - y_hat)**2
    else :
        delta * |y - y_hat| - 0.5 * delta**2

    :param y: Target value
    :param y_hat: Estimated value
    :param delta: Outliers threshold
    :return: float
    """
    half_delta_squared = 0.5 * delta * delta
    error = y - y_hat
    abs_error = abs(error)

    less_than = 0.5 * square(error)
    more_than = (delta * abs_error) - half_delta_squared

    loss_per_sample = element_select(less(abs_error, delta), less_than,
                                     more_than)

    return reduce_sum(loss_per_sample, name='loss')
コード例 #4
0
        def criterion(pre_states, actions, post_states, rewards, terminals):
            # Compute the q_targets
            q_targets = compute_q_targets(post_states, rewards, terminals)

            # actions is a 1-hot encoding of the action done by the agent
            q_acted = reduce_sum(self._action_value_net(pre_states) * actions, axis=0)

            # Define training criterion as the Huber Loss function
            return huber_loss(q_targets, q_acted, 1.0)
コード例 #5
0
ファイル: DQNdrone.py プロジェクト: BrainsGarden/AirSim
        def criterion(pre_states, actions, post_states, rewards, terminals):
            # Compute the q_targets
            q_targets = compute_q_targets(post_states, rewards, terminals)

            # actions is a 1-hot encoding of the action done by the agent
            q_acted = reduce_sum(self._action_value_net(pre_states) * actions, axis=0)

            # Define training criterion as the Huber Loss function
            return huber_loss(q_targets, q_acted, 1.0)
コード例 #6
0
ファイル: cntk_utils.py プロジェクト: AllanYiin/CNTK
def huber_loss(output, target):
    r"""See https://en.wikipedia.org/wiki/Huber_loss for definition.

    \delta is set to 1. This is not the right definition if output and target
    differ in more than one dimension.
    """
    a = target - output
    return C.reduce_sum(C.element_select(
        C.less(C.abs(a), 1), C.square(a) * 0.5, C.abs(a) - 0.5))
コード例 #7
0
def huber_loss(y, y_hat, delta):
    half_delta_squared = 0.5 * delta * delta
    error = y - y_hat
    abs_error = abs(error)

    less_than = 0.5 * square(error)
    more_than = (delta * abs_error) - half_delta_squared
    loss_per_sample = element_select(less(abs_error, delta), less_than, more_than)

    return reduce_sum(loss_per_sample, name='loss')
コード例 #8
0
def huber_loss(output, target):
    r"""See https://en.wikipedia.org/wiki/Huber_loss for definition.

    \delta is set to 1. This is not the right definition if output and target
    differ in more than one dimension.
    """
    a = target - output
    return C.reduce_sum(
        C.element_select(C.less(C.abs(a), 1),
                         C.square(a) * 0.5,
                         C.abs(a) - 0.5))
コード例 #9
0
    def __init__(self,
                 in_shape,
                 output_shape,
                 device_id=None,
                 learning_rate=0.00025,
                 momentum=0.9,
                 minibatch_size=32,
                 update_interval=10000,
                 n_workers=1,
                 visualizer=None):
        """
        Q Neural Network following Mnih and al. implementation and default options.

        The network has the following topology:
        Convolution(32, (8, 8))
        Convolution(64, (4, 4))
        Convolution(64, (2, 2))
        Dense(512)

        :param in_shape: Shape of the observations perceived by the learner (the neural net input)
        :param output_shape: Size of the action space (mapped to the number of output neurons)

        :param device_id: Use None to let CNTK select the best available device,
                          -1 for CPU, >= 0 for GPU
                          (default: None)

        :param learning_rate: Learning rate
                              (default: 0.00025, as per Mnih et al.)

        :param momentum: Momentum, provided as momentum value for
                         averaging gradients without unit gain filter
                         Note that CNTK does not currently provide an implementation
                         of Graves' RmsProp with momentum.
                         It uses AdamSGD optimizer instead.
                         (default: 0, no momentum with RProp optimizer)

        :param minibatch_size: Minibatch size
                               (default: 32, as per Mnih et al.)

        :param n_workers: Number of concurrent worker for distributed training.
                          (default: 1, not distributed)

        :param visualizer: Optional visualizer allowing the model to save summary data
                           (default: None, no visualization)

        Ref: Mnih et al.: "Human-level control through deep reinforcement learning."
        Nature 518.7540 (2015): 529-533.
        """

        assert learning_rate > 0, 'learning_rate should be > 0'
        assert 0. <= momentum < 1, 'momentum should be 0 <= momentum < 1'

        QModel.__init__(self, in_shape, output_shape)
        CntkModel.__init__(self, device_id, False, n_workers, visualizer)

        self._nb_actions = output_shape
        self._steps = 0
        self._target_update_interval = update_interval
        self._target = None

        # Input vars
        self._environment = input(in_shape,
                                  name='env',
                                  dynamic_axes=(Axis.default_batch_axis()))
        self._q_targets = input(1,
                                name='q_targets',
                                dynamic_axes=(Axis.default_batch_axis()))
        self._actions = input(output_shape,
                              name='actions',
                              dynamic_axes=(Axis.default_batch_axis()))

        # Define the neural network graph
        self._model = self._build_model()(self._environment)
        self._target = self._model.clone(
            CloneMethod.freeze, {self._environment: self._environment})

        # Define the learning rate
        lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch)

        # AdamSGD optimizer
        m_schedule = momentum_schedule(momentum)
        vm_schedule = momentum_schedule(0.999)
        l_sgd = adam(self._model.parameters,
                     lr_schedule,
                     momentum=m_schedule,
                     unit_gain=True,
                     variance_momentum=vm_schedule)

        if self.distributed_training:
            raise NotImplementedError('ASGD not implemented yet.')

        # _actions is a sparse 1-hot encoding of the actions done by the agent
        q_acted = reduce_sum(self._model * self._actions, axis=0)

        # Define the trainer with Huber Loss function
        criterion = huber_loss(q_acted, self._q_targets, 1.0)

        self._learner = l_sgd
        self._trainer = Trainer(self._model, (criterion, None), l_sgd)
コード例 #10
0
def negative_of_entropy_with_softmax(p):
    """See https://en.wikipedia.org/wiki/Entropy_(information_theory)."""
    return C.reduce_sum(C.softmax(p) * p) - C.reduce_log_sum_exp(p)
コード例 #11
0
        def criterion(pre_states, actions, post_states, rewards, terminals):
            q_targets = compute_q_targets(post_states, rewards, terminals)

            q_acted = reduce_sum(self._action_value_net(pre_states) * actions, axis=0)

            return huber_loss(q_targets, q_acted, 1.0)
コード例 #12
0
ファイル: cntk_utils.py プロジェクト: AllanYiin/CNTK
def negative_of_entropy_with_softmax(p):
    """See https://en.wikipedia.org/wiki/Entropy_(information_theory)."""
    return C.reduce_sum(C.softmax(p) * p) - C.reduce_log_sum_exp(p)
コード例 #13
0
    def __init__(self,
                 state_dim,
                 action_dim,
                 gamma=0.99,
                 learning_rate=1e-4,
                 momentum=0.95):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.gamma = gamma

        with default_options(activation=relu, init=he_uniform()):
            # Convolution filter counts were halved to save on memory, no gpu :(
            self.model = Sequential([
                Convolution2D((8, 8), 16, strides=4, name='conv1'),
                Convolution2D((4, 4), 32, strides=2, name='conv2'),
                Convolution2D((3, 3), 32, strides=1, name='conv3'),
                Dense(256, init=he_uniform(scale=0.01), name='dense1'),
                Dense(action_dim,
                      activation=None,
                      init=he_uniform(scale=0.01),
                      name='actions')
            ])
            self.model.update_signature(Tensor[state_dim])

        # Create the target model as a copy of the online model
        self.target_model = None
        self.update_target()

        self.pre_states = input_variable(state_dim, name='pre_states')
        self.actions = input_variable(action_dim, name='actions')
        self.post_states = input_variable(state_dim, name='post_states')
        self.rewards = input_variable((), name='rewards')
        self.terminals = input_variable((), name='terminals')
        self.is_weights = input_variable((), name='is_weights')

        predicted_q = reduce_sum(self.model(self.pre_states) * self.actions,
                                 axis=0)

        # DQN - calculate target q values
        # post_q = reduce_max(self.target_model(self.post_states), axis=0)

        # DDQN - calculate target q values
        online_selection = one_hot(
            argmax(self.model(self.post_states), axis=0), self.action_dim)
        post_q = reduce_sum(self.target_model(self.post_states) *
                            online_selection,
                            axis=0)

        post_q = (1.0 - self.terminals) * post_q
        target_q = stop_gradient(self.rewards + self.gamma * post_q)

        # Huber loss
        delta = 1.0
        self.td_error = minus(predicted_q, target_q, name='td_error')
        abs_error = abs(self.td_error)
        errors = element_select(less(abs_error, delta),
                                square(self.td_error) * 0.5,
                                delta * (abs_error - 0.5 * delta))
        loss = errors * self.is_weights

        # Adam based SGD
        lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch)
        m_scheule = momentum_schedule(momentum)
        vm_schedule = momentum_schedule(0.999)

        self._learner = adam(self.model.parameters,
                             lr_schedule,
                             m_scheule,
                             variance_momentum=vm_schedule)
        self.writer = TensorBoardProgressWriter(log_dir='metrics',
                                                model=self.model)
        self.trainer = Trainer(self.model, (loss, None), [self._learner],
                               self.writer)
コード例 #14
0
ファイル: qlearning.py プロジェクト: petrosgk/RL_experiments
    def __init__(self, in_shape, output_shape, device_id=None,
                 learning_rate=0.00025, momentum=0.9,
                 minibatch_size=32, update_interval=10000,
                 n_workers=1, visualizer=None):

        """
        Q Neural Network following Mnih and al. implementation and default options.

        The network has the following topology:
        Convolution(32, (8, 8))
        Convolution(64, (4, 4))
        Convolution(64, (2, 2))
        Dense(512)

        :param in_shape: Shape of the observations perceived by the learner (the neural net input)
        :param output_shape: Size of the action space (mapped to the number of output neurons)

        :param device_id: Use None to let CNTK select the best available device,
                          -1 for CPU, >= 0 for GPU
                          (default: None)

        :param learning_rate: Learning rate
                              (default: 0.00025, as per Mnih et al.)

        :param momentum: Momentum, provided as momentum value for
                         averaging gradients without unit gain filter
                         Note that CNTK does not currently provide an implementation
                         of Graves' RmsProp with momentum.
                         It uses AdamSGD optimizer instead.
                         (default: 0, no momentum with RProp optimizer)

        :param minibatch_size: Minibatch size
                               (default: 32, as per Mnih et al.)

        :param n_workers: Number of concurrent worker for distributed training.
                          (default: 1, not distributed)

        :param visualizer: Optional visualizer allowing the model to save summary data
                           (default: None, no visualization)

        Ref: Mnih et al.: "Human-level control through deep reinforcement learning."
        Nature 518.7540 (2015): 529-533.
        """

        assert learning_rate > 0, 'learning_rate should be > 0'
        assert 0. <= momentum < 1, 'momentum should be 0 <= momentum < 1'

        QModel.__init__(self, in_shape, output_shape)
        CntkModel.__init__(self, device_id, False, n_workers, visualizer)

        self._nb_actions = output_shape
        self._steps = 0
        self._target_update_interval = update_interval
        self._target = None

        # Input vars
        self._environment = input(in_shape, name='env',
                                  dynamic_axes=(Axis.default_batch_axis()))
        self._q_targets = input(1, name='q_targets',
                                dynamic_axes=(Axis.default_batch_axis()))
        self._actions = input(output_shape, name='actions',
                              dynamic_axes=(Axis.default_batch_axis()))

        # Define the neural network graph
        self._model = self._build_model()(self._environment)
        self._target = self._model.clone(
            CloneMethod.freeze, {self._environment: self._environment}
        )

        # Define the learning rate
        lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch)

        # AdamSGD optimizer
        m_schedule = momentum_schedule(momentum)
        vm_schedule = momentum_schedule(0.999)
        l_sgd = adam(self._model.parameters, lr_schedule,
                     momentum=m_schedule,
                     unit_gain=True,
                     variance_momentum=vm_schedule)

        if self.distributed_training:
            raise NotImplementedError('ASGD not implemented yet.')

        # _actions is a sparse 1-hot encoding of the actions done by the agent
        q_acted = reduce_sum(self._model * self._actions, axis=0)

        # Define the trainer with Huber Loss function
        criterion = huber_loss(q_acted, self._q_targets, 1.0)

        self._learner = l_sgd
        self._trainer = Trainer(self._model, (criterion, None), l_sgd)