Exemple #1
0
def _spectral_norm(self, call_method, inputs, **kwargs):
  w_shape = K.int_shape(self.kernel)
  w = tf.reshape(self.kernel, [-1, w_shape[-1]])

  u = self.u

  u_hat  = self.u
  v_hat  = None

  for _ in range(1):
    """
    power iteration
    Usually iteration = 1 will be enough
    """
    v_ = K.dot(u_hat, tf.transpose(w))
    v_hat = K.l2_normalize(v_)

    u_ = K.dot(v_hat, w)
    u_hat = K.l2_normalize(u_)

  u_hat = K.stop_gradient(u_hat)
  v_hat = K.stop_gradient(v_hat)

  sigma = K.dot(K.dot(v_hat, w), K.transpose(u_hat))
  with tf.control_dependencies([u.assign(u_hat)]):
    w_norm = w / sigma
    w_norm = K.reshape(w_norm, w_shape)

  kernel = self.kernel
  self.kernel = w_norm
  result = call_method(inputs, *kwargs)
  self.kernel = kernel
  return result
Exemple #2
0
    def _encoder(x):
        # x = tf.keras.layers.Dropout(rate)(x)

        # Two Embeddings (3 for classes, 10 for degrees)
        cls = K.expand_dims(K.arange(3), axis=0)
        cls = K.stop_gradient(cls)
        cls = tf.keras.layers.Embedding(3, d_model)(cls)
        cls = K.expand_dims(cls, axis=2)  # (1, 3, 1, d_model)

        direct = K.expand_dims(K.arange(10), axis=0)
        direct = K.stop_gradient(direct)
        direct = tf.keras.layers.Embedding(10, d_model)(direct)
        direct = K.expand_dims(direct, axis=1)  # (1, 1, 10, d_model)

        embedding = tf.keras.layers.Reshape((30, d_model))(cls + direct)

        for i in range(n_layers):
            x = transformer_layer(d_model, n_heads, dff, rate)(x)

        x = multi_head_attention(d_model, n_heads,
                                 perm_and_reshape=False)(embedding, x, x)
        x = tf.keras.layers.Dropout(rate)(x)
        x = tf.keras.layers.BatchNormalization()(x)

        if softmax:
            x = tf.keras.layers.Softmax()(x)

        return x
    def style_loss(style_features,
                   combination_features,
                   image_shape,
                   style_mask=None,
                   content_mask=None):

        if content_mask is not None:
            mask_tensor = K.variable(
                process_mask(content_mask, combination_features.shape))
            combination_features = combination_features * K.stop_gradient(
                mask_tensor)
            del mask_tensor

        if style_mask is not None:
            mask_tensor = K.variable(
                process_mask(style_mask, style_features.shape))
            style_features = style_features * K.stop_gradient(mask_tensor)
            if content_mask is not None:
                combination_features = combination_features * K.stop_gradient(
                    mask_tensor)
            del mask_tensor

        style_gram = gram_matrix(style_features, use_shifted_activations)
        content_gram = gram_matrix(combination_features,
                                   use_shifted_activations)
        size = image_shape[0] * image_shape[1]
        number_of_channels = 3
        loss = tf.reduce_sum(
            tf.square(style_gram - content_gram)) / (4.0 *
                                                     (number_of_channels**2) *
                                                     (size**2))
        return (loss)
    def _init_models(self):
        # make sure that the policy loss is set to 'sac'
        if self.policy.update_strategy != 'sac':
            self.policy.update_strategy = 'sac'
            self.logger.warn("policy.update_strategy has been set to 'sac'")

        # inputs
        S, A = self.policy.train_model.inputs[:2]
        G = keras.Input(name='G', shape=(1,), dtype='float')

        # constuct log(pi(a_sampled, s))
        A_sampled = self.policy.dist.sample()  # differentiable
        log_pi = self.policy.dist.log_proba(A_sampled)

        # use target models for q-values, because they're non-trainable
        Q1 = self._get_q_value(self.q_func1, S, A_sampled)
        Q2 = self._get_q_value(self.q_func2, S, A_sampled)
        Q_both = keras.layers.Concatenate()([Q1, Q2])
        check_tensor(Q_both, ndim=2, axis_size=2, axis=1)

        # construct entropy-corrected target for state value function
        Q_min = keras.layers.Lambda(lambda x: K.min(x, axis=1))(Q_both)
        V_target = K.stop_gradient(Q_min - self.policy.entropy_beta * log_pi)
        check_tensor(V_target, ndim=1)

        # compute advantages from q-function
        V = self.v_func.predict_model(S)
        check_tensor(V, axis_size=1, axis=1)
        V = K.stop_gradient(K.squeeze(V, axis=1))
        Q = keras.layers.Lambda(lambda x: K.mean(x, axis=1))(Q_both)
        Adv = Q - self.policy.entropy_beta * log_pi - V

        # update loss with advantage coming directly from graph
        policy_loss, metrics = self.policy.policy_loss_with_metrics(Adv)
        v_loss = self.v_func.train_model([S, V_target])
        q_loss1 = self.q_func1.train_model([S, A, G])
        q_loss2 = self.q_func2.train_model([S, A, G])
        value_loss = (v_loss + q_loss1 + q_loss2) / 3.

        # add losses to metrics dict
        metrics.update({
            'policy/loss': policy_loss,
            'v_func/loss': v_loss,
            'q_func1/loss': q_loss1,
            'q_func2/loss': q_loss2,
            'value/loss': value_loss,
        })

        # combined loss function
        loss = policy_loss + self.value_loss_weight * value_loss
        check_tensor(loss, ndim=0)  # should be a scalar

        # joint model
        self.train_model = keras.Model([S, A, G], loss)
        self.train_model.add_loss(loss)
        for name, metric in metrics.items():
            self.train_model.add_metric(metric, name=name, aggregation='mean')
        self.train_model.compile(optimizer=self.policy.train_model.optimizer)
    def call(self, inputs, **kwargs):
        boxes = K.stop_gradient(inputs[0])
        fpn = K.stop_gradient(inputs[1])

        time_distributed = K.ndim(boxes) == 4

        if time_distributed:
            boxes_shape = K.shape(boxes)
            fpn_shape = K.shape(fpn)

            new_boxes_shape = [-1] + [
                boxes_shape[i] for i in range(2, K.ndim(boxes))
            ]
            new_fpn_shape = [-1
                             ] + [fpn_shape[i] for i in range(2, K.ndim(fpn))]

            boxes = K.reshape(boxes, new_boxes_shape)
            fpn = K.reshape(fpn, new_fpn_shape)

        image_shape = K.cast(K.shape(fpn), K.floatx())

        def _roi_align(args):
            boxes = args[0]
            fpn = args[1]  # process the feature map
            x1 = boxes[:, 0]
            y1 = boxes[:, 1]
            x2 = boxes[:, 2]
            y2 = boxes[:, 3]

            fpn_shape = K.cast(K.shape(fpn), dtype=K.floatx())
            norm_boxes = K.stack([
                (y1 / image_shape[1] * fpn_shape[0]) / (fpn_shape[0] - 1),
                (x1 / image_shape[2] * fpn_shape[1]) / (fpn_shape[1] - 1),
                (y2 / image_shape[1] * fpn_shape[0] - 1) / (fpn_shape[0] - 1),
                (x2 / image_shape[2] * fpn_shape[1] - 1) / (fpn_shape[1] - 1)
            ],
                                 axis=1)

            rois = tf.image.crop_and_resize(
                K.expand_dims(fpn, axis=0), norm_boxes,
                tf.zeros((K.shape(norm_boxes)[0], ), dtype='int32'),
                self.crop_size)

            return rois

        roi_batch = tf.map_fn(_roi_align,
                              elems=[boxes, fpn],
                              dtype=K.floatx(),
                              parallel_iterations=self.parallel_iterations)

        if time_distributed:
            roi_shape = tf.shape(roi_batch)
            new_roi_shape = [boxes_shape[0], boxes_shape[1]] + \
                            [roi_shape[i] for i in range(1, K.ndim(roi_batch))]
            roi_batch = tf.reshape(roi_batch, new_roi_shape)

        return roi_batch
Exemple #6
0
    def call(self, inputs):

        binary_kernel = self.kernel + K.stop_gradient(
            K.sign(self.kernel) - self.kernel)
        binary_kernel = binary_kernel + K.stop_gradient(
            binary_kernel * self.multiplier - binary_kernel)

        outputs = K.conv2d(inputs,
                           binary_kernel,
                           strides=self.strides,
                           padding=self.padding,
                           data_format=self.data_format,
                           dilation_rate=self.dilation_rate)

        return outputs
 def round_through(self, x, rounding_method=None):
     '''Element-wise rounding to the closest integer with full gradient propagation.
     A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182)
     '''
     def ceil_fn():
         return tf.ceil(x)
     
     def floor_fn():
         return tf.floor(x)
     
     if rounding_method is None:
         rounding_method=self.rounding_method
     
     if rounding_method == 'nearest':
         rounded = tf.math.rint(x)
     elif rounding_method == 'down':
         rounded = tf.floor(x)
     elif rounding_method == 'stochastic':
         rounded=tf.cond(tf.greater(tf.reduce_mean(x-tf.floor(x)), 0.5), ceil_fn, floor_fn)
     elif rounding_method == 'zero':
         neg_alter=tf.add(tf.multiply(tf.cast(tf.less(x,0),'float32'),-2.0),1.0)
         rounded=tf.multiply(tf.floor(tf.multiply(x,neg_alter)),neg_alter)
     else:
         print('Wrong Rounding Type\nChoose between \'nearest\' , \'down\', \'zero\', \'stochastic\' ')
         
     rounded_through = x + K.stop_gradient(rounded - x)
     return rounded_through
Exemple #8
0
def loss_uncertainty_gaussian_likelihood(y_true, y_pred):
    """
    Loss function that calculates something similar to a Gaussian Likelihood.
    Requires that y_pred contains only one predicted value (label).
    y_true & y_pred are expected to contain the predicted/true label and
    the predicted std for the label.
    L = ln(std ** 2) + (y_label_pred - y_label_true) / (std ** 2)

    Returns
    -------
    loss : Gaussian Likelihood loss

    """
    # order in y_pred: 1) pred label 2) pred label error
    # prevent that the gradient flows back over the label network:
    y_pred_label = K.stop_gradient(y_pred[:, 0])
    y_pred_label_std = y_pred[:, 1]
    y_true_label = y_true[:, 0]

    # equal to a lower std limit of 3.16 e-2
    eps = tf.constant(1e-3, dtype="float32")
    # y_pred_label_std += eps

    loss = K.log(K.pow(y_pred_label_std, 2) + eps) + K.pow(
        y_pred_label - y_true_label, 2) / (K.pow(y_pred_label_std, 2) + eps)
    return loss
Exemple #9
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1

        # Applies bounds on actual learning rate
        step_size = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                          (1. - K.pow(self.beta_1, t)))

        final_lr = self.final_lr * lr / self.base_lr
        lower_bound = final_lr * (1. - 1. / (self.gamma * t + 1.))
        upper_bound = final_lr * (1. + 1. / (self.gamma * t))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsbound:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            # apply weight decay
            if self.weight_decay != 0.:
                g += self.weight_decay * K.stop_gradient(p)

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            if self.amsbound:
                vhat_t = K.maximum(vhat, v_t)
                denom = (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                denom = (K.sqrt(v_t) + self.epsilon)

            # Compute the bounds
            step_size_p = step_size * K.ones_like(denom)
            step_size_p_bound = step_size_p / denom
            bounded_lr_t = m_t * K.minimum(
                K.maximum(step_size_p_bound, lower_bound), upper_bound)

            p_t = p - bounded_lr_t

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemple #10
0
    def _train(self, map_input, reward, policy_mask, policy_one_hot):
        _entropy = _policy_loss = _value_loss = 0.

        policy_mask = policy_mask.astype('float32')
        with tf.GradientTape() as tape:
            policy, value = self.model(map_input)
            value = K.squeeze(value, axis=1)
            policy = K.exp(policy) / (K.sum(K.exp(policy)))

            value_loss = .5 * K.square(reward - value)
            # Should I use policy * policy_mask here?
            entropy = -K.sum(policy * K.log(policy + 1e-10), axis=[1, 2, 3])

            log_prob = K.log(K.sum(policy * policy_one_hot, axis=[1, 2, 3]) + 1e-10)
            advantage = reward - K.stop_gradient(value)

            policy_loss = -log_prob * advantage - entropy * ENTROPY_RATE

            total_loss = policy_loss + value_loss

            _entropy = K.mean(entropy)
            _policy_loss = K.mean(K.abs(policy_loss))
            _value_loss = K.mean(value_loss)

        gradients = tape.gradient(total_loss, self.model.trainable_variables)
        gradients, _ = tf.clip_by_global_norm(gradients, GRADIENT_CLIP_MAX)
        self.opt.apply_gradients(zip(gradients, self.model.trainable_variables))

        return [float(_value_loss), float(_policy_loss), float(_entropy)]
Exemple #11
0
    def __call__(self, layer):
        if self.flatten:
            flatten = layers.Flatten()(layer)
        else:
            flatten = layer
        outputs = []

        x = DenseBlock(units=128, **self.kwargs)(flatten)
        x = DenseBlock(units=32, **self.kwargs)(x)

        for name in self.output_names:
            output_label = layers.Dense(units=1, name=name)(x)
            outputs.append(output_label)

        # Network for the errors of the labels
        x_err = layers.Lambda(lambda a: K.stop_gradient(a))(flatten)

        x_err = DenseBlock(units=128, **self.kwargs)(x_err)
        x_err = DenseBlock(units=64, **self.kwargs)(x_err)
        x_err = DenseBlock(units=32, **self.kwargs)(x_err)

        for i, name in enumerate(self.output_names):
            output_label_error = layers.Dense(units=1,
                                              activation='linear',
                                              name=name + '_err_temp')(x_err)
            # Predicted label gets concatenated with its error (needed for loss function)
            output_label_merged = layers.Concatenate(name=name + '_err')(
                [outputs[i], output_label_error])
            outputs.append(output_label_merged)
        return outputs
Exemple #12
0
def cnn(config):
    if config['batch_norm']:
        conv = conv2d_bn
    else:
        conv = conv2d
    if config['ds_type'] == 'samples':
        x = keras.Input(shape=(16000, ), name='input')
        genc = genc_model(dim_z=40)
        y = genc(x)
        if config['load_genc']:
            genc.load_weights(str(Path.cwd() / 'genc.h5'))
        if not config['train_genc']:
            y = backend.stop_gradient(y)
        y = keras.layers.Reshape((-1, 40, 1))(y)
        y = keras.layers.Cropping2D(((5, 0), (0, 0)))(y)
        y = conv(y, 16)
    else:
        x = keras.Input(shape=(None, 40, 1), name='input')
        y = conv(x, 16)
    y = conv(y, 16)
    y = pool(y)
    y = conv(y, 32)
    y = conv(y, 32)
    y = pool(y)
    y = conv(y, 64)
    y = conv(y, 30)
    y = keras.layers.GlobalAveragePooling2D()(y)

    return keras.Model(x, y)
Exemple #13
0
    def optimizer(self):
        """ Actor Optimization: Advantages + Entropy term to encourage exploration
        (Cf. https://arxiv.org/abs/1602.01783)
        """
        actor, critic = self.actor_critic(self.actor_critic.input)

        action = K.placeholder(shape=(None, self.out_dim))
        advantages = K.placeholder(shape=(None, ))
        weighted_actions = K.sum(action * actor, axis=1)
        eligibility = K.log(weighted_actions +
                            1e-10) * K.stop_gradient(advantages)
        entropy = K.sum(actor * K.log(actor + 1e-10), axis=1)
        entropy = K.mean(entropy)
        actor_loss = 1.0e-3 * entropy - K.mean(eligibility)
        # actor_loss = 1.0e-4 * entropy - K.cast(K.sum(eligibility), 'float32')

        discounted_reward = K.placeholder(shape=(None, 1))
        # critic_loss = K.mean(K.square(discounted_reward - critic))
        critic_loss = K.mean(K.square(discounted_reward - critic))
        # loss = actor_loss + 0.5 * critic_loss
        # updates = self.adam_optimizer.get_updates(loss=loss, params=self.actor_critic.trainable_weights)
        # return K.function(inputs=[self.actor_critic.input, action, advantages, discounted_reward], \
        #                     outputs=loss, updates=updates)
        updates = self.adam_optimizer.get_updates(
            loss=[actor_loss, critic_loss],
            params=self.actor_critic.trainable_weights)
        return K.function(inputs=[self.actor_critic.input, action, advantages, discounted_reward], \
                            outputs=[actor_loss, critic_loss], updates=updates)
Exemple #14
0
 def call(self, inputs, mask=None, **kwargs):
     inputs, embeddings = inputs
     if self.stop_gradient:
         embeddings = K.stop_gradient(embeddings)
     outputs = K.dot(inputs, K.transpose(embeddings))
     if self.use_bias:
         outputs = K.bias_add(outputs, self.bias)
     return keras.activations.softmax(outputs)
Exemple #15
0
 def __init__(self, G, base_loss=keras.losses.mse):
     if K.ndim(G) == 2:
         shape = K.int_shape(G)
         assert shape[1] == 1, f"bad shape: {shape}"
         G = K.squeeze(G, axis=1)
     assert K.ndim(G) == 1, "bad shape"
     self.G = K.stop_gradient(G)
     self.base_loss = base_loss
    def call(self, inputs, training=None):
        x = inputs
        assert not isinstance(x, list)

        # Compute the minibatch statistics
        mean, var = self._moments(x)
        sigma = K.sqrt(var + self.epsilon)

        # If in training phase set rmax, dmax large so that we use the moving
        # averages to do the normalization
        rmax = K.in_train_phase(self.rmax, K.constant(1e5), training)
        dmax = K.in_train_phase(self.dmax, K.constant(1e5), training)

        # Compute the corrections based on rmax, dmax
        r = K.stop_gradient(
            self._clip(sigma / self.moving_sigma, 1. / rmax, rmax))
        d = K.stop_gradient(
            self._clip((mean - self.moving_mean) / self.moving_sigma, -dmax,
                       dmax))

        # Actually do the normalization and the rescaling
        xnorm = ((x - mean) / sigma) * r + d
        y = self.gamma * xnorm + self.beta

        # Add the moving average updates
        self.add_update([
            K.moving_average_update(self.moving_mean, mean, self.momentum),
            K.moving_average_update(self.moving_sigma, sigma, self.momentum)
        ], x)

        # Add the r, d updates
        rmax_prog = K.minimum(1., self.steps / self.rmax_dur)
        dmax_prog = K.minimum(1., self.steps / self.dmax_dur)
        self.add_update([
            K.update_add(self.steps, 1),
            K.update(self.rmax,
                     self.rmax_0 + rmax_prog * (self.rmax_inf - self.rmax_0)),
            K.update(self.dmax,
                     self.dmax_0 + dmax_prog * (self.dmax_inf - self.dmax_0))
        ])

        # Fix the output's uses learning phase
        y._uses_learning_phase = rmax._uses_learning_phase

        return y
def ternarize(W, H=1):
    '''The weights' ternarization function, 

    # References:
    - [Recurrent Neural Networks with Limited Numerical Precision](http://arxiv.org/abs/1608.06902)
    - [Ternary Weight Networks](http://arxiv.org/abs/1605.04711)
    '''
    Wt = _ternarize(W, H)
    return W + K.stop_gradient(Wt - W)
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [state_ops.assign_add(self.iterations, 1)]

        t = math_ops.cast(self.iterations, K.floatx()) + 1
        
        lr_t = self.lr
        if self.initial_decay > 0:
            lr_t = lr_t * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))
            
        lr_t = lr_t * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                          (1. - K.pow(self.beta_1, t)))

        final_lr = self.final_lr * lr_t / self.base_lr
        lower_bound = final_lr * (1 - 1 / ((1-self.beta_2) * (t + 1)))
        upper_bound = final_lr * (1 + 1 / ((1-self.beta_2) * t))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsbound:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g)
            
            if self.weight_decay != 0.:
                g += self.weight_decay * K.stop_gradient(p)
                
            if self.amsbound:
                vhat_t = K.maximum(vhat, v_t)
                denom = K.sqrt(vhat_t) + self.epsilon
                self.updates.append(state_ops.assign(vhat, vhat_t))
            else:
                denom = K.sqrt(v_t) + self.epsilon

            eta_hat = tf.clip_by_value(lr_t/denom, lower_bound, upper_bound)
#             eta = eta_hat / K.sqrt(t)
            
            p_t = p -  m_t * eta_hat
        
            self.updates.append(state_ops.assign(m, m_t))
            self.updates.append(state_ops.assign(v, v_t))
            
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(state_ops.assign(p, new_p))
        return self.updates
Exemple #19
0
 def _decode_layer(self, input, residual, layer_idx):
     layers = self.layers[layer_idx]
     upsample = layers[0](input)
     stop_grad = self.n_up - 1 - layer_idx in self.stop_grad_skip_connection_levels
     if self.n_up - 1 - layer_idx not in self.omit_skip_connection_levels:
         if isinstance(residual, list):
             if stop_grad:
                 residual = [stop_gradient(r) for r in residual]
             concat = layers[1](flatten_list(residual + [upsample]))
         else:
             if stop_grad:
                 residual = stop_gradient(residual)
             concat = layers[1]([residual, upsample])
     else:
         concat = upsample
     conv = concat
     for convL in layers[2]:
         conv = convL(conv)
     return conv
Exemple #20
0
def style_loss(style, combination, mask_path=None, nb_channels=None):
    assert K.ndim(style) == 3
    assert K.ndim(combination) == 3

    if content_mask_path is not None:
        content_mask = K.variable(load_mask(content_mask_path, nb_channels))
        combination = combination * K.stop_gradient(content_mask)
        del content_mask

    if mask_path is not None:
        style_mask = K.variable(load_mask(mask_path, nb_channels))
        style = style * K.stop_gradient(style_mask)
        if content_mask_path is None:
            combination = combination * K.stop_gradient(style_mask)
        del style_mask

    channels = 3
    size = img_width * img_height
    return K.sum(K.square(style - combination)) / (4. * (channels ** 2) * (size ** 2))
Exemple #21
0
 def dice_loss(y_true, y_pred):
     ans_list = []
     for i in range(class_count):
         tmp_y_true = y_true[:, i]
         tmp_y_pred = y_pred[:, i]
         tmp_y_max_pred = K.max(y_pred, axis=-1, keepdims=False)
         tmp_pred_one_zero = K.cast(K.equal(tmp_y_pred, tmp_y_max_pred),
                                    dtype="float32")
         tmp_true_pred = tmp_y_true * tmp_pred_one_zero
         p = (K.sum(
             K.stop_gradient(tmp_true_pred - tmp_y_pred) + tmp_y_pred) +
              smooth) / (K.sum(
                  K.stop_gradient(tmp_pred_one_zero - tmp_y_pred) +
                  tmp_y_pred) + smooth)
         r = (K.sum(
             K.stop_gradient(tmp_true_pred - tmp_y_pred) + tmp_y_pred) +
              smooth) / (K.sum(tmp_y_true) + smooth)
         ans_list.append(2 * p * r / (p + r))
     return -K.mean(K.stack(ans_list))
Exemple #22
0
    def __init__(self, G, base_loss=keras.losses.Huber()):
        check_tensor(G)

        if K.ndim(G) == 2:
            check_tensor(G, axis_size=1, axis=1)
            G = K.squeeze(G, axis=1)

        check_tensor(G, ndim=1)
        self.G = K.stop_gradient(G)
        self.base_loss = base_loss
 def actor_loss(y_true, y_pred):
     # Here we define a custom for proximal policy optimization
     out = K.clip(y_pred, DELTA, 1 - DELTA)
     log_lik = K.sum(y_true * K.log(out), axis=-1)
     old_log_lik = K.stop_gradient(
         K.sum(y_true * K.log(out), axis=-1))
     ratio = K.exp(log_lik - old_log_lik)
     clipped_ratio = K.clip(ratio, 1 - self.epsilon,
                            1 + self.epsilon)
     return -K.mean(
         K.minimum(ratio * advantages, clipped_ratio * advantages))
Exemple #24
0
def loss_uncertainty_gaussian_likelihood_dir(y_true, y_pred):
    """
    Loss function that calculates something similar to a Gaussian
    Likelihood for predicted directions. Requires that y_pred contains
    three predicted values (labels): dir_x, dir_y, dir_z.
    y_true & y_pred are expected to contain the predicted/true label and
    the predicted std for the label.
    L = ln(std ** 2) + (y_label_pred - y_label_true) / (std ** 2)

    Returns
    -------
    loss : Gaussian Likelihood loss for the directional error

    """
    # order in y_pred: 1) pred label 2) pred label error
    # prevent that the gradient flows back over the label network
    y_pred_dir_x, y_pred_dir_y, y_pred_dir_z = K.stop_gradient(
        y_pred[:, 0]), K.stop_gradient(y_pred[:,
                                              1]), K.stop_gradient(y_pred[:,
                                                                          2])
    y_pred_std_dir_x, y_pred_std_dir_y, y_pred_std_dir_z = y_pred[:,
                                                                  3], y_pred[:,
                                                                             4], y_pred[:,
                                                                                        5]
    y_true_dir_x, y_true_dir_y, y_true_dir_z = y_true[:,
                                                      0], y_true[:,
                                                                 1], y_true[:,
                                                                            2]

    # equal to a lower std limit of 1e-3
    eps = tf.constant(1e-6, dtype="float32")

    loss_dir_x = K.log(K.pow(y_pred_std_dir_x, 2) + eps) + K.pow(
        y_pred_dir_x - y_true_dir_x, 2) / (K.pow(y_pred_std_dir_x, 2) + eps)
    loss_dir_y = K.log(K.pow(y_pred_std_dir_y, 2) + eps) + K.pow(
        y_pred_dir_y - y_true_dir_y, 2) / (K.pow(y_pred_std_dir_y, 2) + eps)
    loss_dir_z = K.log(K.pow(y_pred_std_dir_z, 2) + eps) + K.pow(
        y_pred_dir_z - y_true_dir_z, 2) / (K.pow(y_pred_std_dir_z, 2) + eps)

    loss = loss_dir_x + loss_dir_y + loss_dir_z
    return loss
 def clip_through(self, X, min_val=None, max_val=None):
     '''Element-wise clipping with gradient propagation
     Analogue to round_through
     '''
     if min_val is None:
         min_val=self.min_value
     if max_val is None:
         max_val=self.max_value
         
     clipped = K.clip(X, min_val, max_val)
     clipped_through= X + K.stop_gradient(clipped-X)
     return clipped_through 
Exemple #26
0
def Av_CNN_GCN_trans_model(patch_sz,
                           number_class,
                           number_neighbors=2,
                           droupout_rate=0.5,
                           kernel_reg=None):

    CNNmodel = Av_CNN3D_model(patch_sz,
                              number_class,
                              droupout_rate=0.5,
                              kernel_reg=None)
    modeldir = './path-to-trained-CNNmodel/models.h5'
    if os.path.isfile(modeldir):
        CNNmodel.load_weights(modeldir)
    else:
        sys.exit(
            "Error! Please provide a trained model in Av_CNN_GCN_trans_model!")
        return
    # CNNmodel.summary()

    X_batch = Input(shape=(patch_sz[0], patch_sz[1], patch_sz[2], 1),
                    name="X_batch")
    NX_batch = Input(shape=(number_neighbors, patch_sz[0], patch_sz[1],
                            patch_sz[2]),
                     name="NX_batch")

    reshape = Reshape((patch_sz[0], patch_sz[1], patch_sz[2], 1))

    Phi_fun = phi_fun(patch_sz=patch_sz,
                      kernel_reg=kernel_reg,
                      droupout_rate=droupout_rate)
    Phi_fun.set_weights(CNNmodel.layers[1].get_weights())
    X = Phi_fun(X_batch)
    NX = []
    for i in range(NX_batch.shape[1].value):
        NX_batch_i = slicelayer(index=i)(NX_batch)
        tmp = reshape(NX_batch_i)
        tmp = Phi_fun(tmp)
        NX.append(tmp)  # the size of tmp [b, F] & NX is a list with n [b, F]
    NX = concatenate(NX, axis=1)
    NX = Lambda(lambda t: stop_gradient(t))(NX)

    x = gcn_layer(X,
                  NX,
                  Num_Gaussian=1,
                  n_hidden_feat=1,
                  OFeat_len=2,
                  lamda=1.0)
    xout = Activation('softmax')(x)

    model = keras.Model(inputs=[X_batch, NX_batch],
                        outputs=xout,
                        name='AV_GCN')
    return model
Exemple #27
0
    def call(self, x):
        s, x1 = x
        a = x1[:, :1]
        s_hat = x1[:, 1:2]

        # Rescale the weights, making sure we mostly scale down
        a_hat = a * K.clip(s_hat / s, self.min_decrease, self.max_increase)

        # Scale again so that the reported loss is comparable to the other ones
        t = 1
        #sT = K.transpose(s)
        #t = K.dot(sT, a) / K.dot(sT, a_hat)

        return K.stop_gradient([a_hat * t])[0]
    def call(self, inputs):

        #***************************************************************************************************
        #Binary layer as in https://arxiv.org/abs/1802.08530
        #M. D. McDonnell, Training wide residual networks for deployment using a single bit for each weight
        #ICLR, 2018
        #
        #This code sets the full precsion weights to binary for forward and bacjkward propagation
        #but enables gradients to update the full precision weights that ar used only during training
        #
        binary_kernel = self.kernel + K.stop_gradient(
            K.sign(self.kernel) - self.kernel)
        binary_kernel = binary_kernel + K.stop_gradient(
            binary_kernel * self.multiplier - binary_kernel)
        #***************************************************************************************************

        outputs = K.conv2d(inputs,
                           binary_kernel,
                           strides=self.strides,
                           padding=self.padding,
                           data_format=self.data_format,
                           dilation_rate=self.dilation_rate)

        return outputs
def Lossnet(inputs_lossnet, embedding_size):
    """LossNet network"""
    def get_embedding_nets(embedding_size):
        return Sequential([
            layers.GlobalAveragePooling2D(),
            layers.Dense(embedding_size),
            layers.Activation("relu")
        ])

    c_pred = inputs_lossnet[0]
    features_w = inputs_lossnet[1:]

    # split the foward passing of the features to be able to split the training
    # stop the gradient back to the backbone (expresed as s for split and w for whole)
    features_s = []
    for i, out in enumerate(features_w):
        features_s.append(
            layers.Lambda(lambda x: backend.stop_gradient(x))(out))

    embeddings_fn_list = []
    # generate the embeddings layers
    for feat in features_w:
        embeddings_fn_list.append(get_embedding_nets(embedding_size))
    # define dense function
    dense_fn = layers.Dense(1, name="L_pred")
    concat_same = layers.Concatenate(name="Embedding")

    #
    embeddings_list_whole = []
    embeddings_list_split = []
    for i, out in enumerate(features_w):
        embeddings_list_split.append(embeddings_fn_list[i](features_s[i]))
        embeddings_list_whole.append(embeddings_fn_list[i](features_w[i]))

    embedding_whole = concat_same(embeddings_list_whole)
    embedding_split = concat_same(embeddings_list_split)

    #l_pred_w = tf.squeeze(dense_fn(embedding_whole))
    #l_pred_s = tf.squeeze(dense_fn(embedding_split))

    l_pred_w = dense_fn(embedding_whole)
    l_pred_s = dense_fn(embedding_split)

    # concatenate the prediction of the classes with the predicted loss in order to compute the loss
    concat_w = layers.Concatenate(axis=-1, name='l_pred_w')([c_pred, l_pred_w])
    concat_s = layers.Concatenate(axis=-1, name='l_pred_s')([c_pred, l_pred_s])

    return [concat_w, concat_s, embedding_whole, embedding_split]
Exemple #30
0
    def _train(self, screens_input, action_input, select_input, reward, action,
               screen_action, screen_used):
        _entropy = _policy_loss = _value_loss = 0.

        with tf.GradientTape() as tape:
            spatial_policy, ns_policy, value = self.model(
                [screens_input, action_input, select_input])
            value = K.squeeze(value, axis=1)

            ns_action_one_hot = K.one_hot(action, len(ACTION_OPTIONS))
            screen_action_one_hot = K.one_hot(screen_action,
                                              SCREEN_SIZE * SCREEN_SIZE)

            value_loss = .5 * K.square(reward - value)

            entropy = -K.sum(ns_policy * K.log(ns_policy + 1e-10), axis=1) - \
                       K.sum(spatial_policy * K.log(spatial_policy + 1e-10), axis=1)
            ns_log_prob = K.log(
                K.sum(ns_policy * ns_action_one_hot, axis=1) + 1e-10)
            spatial_log_prob = K.log(
                K.sum(spatial_policy * screen_action_one_hot, axis=1) + 1e-10)
            advantage = reward - K.stop_gradient(value)

            # Mask out spatial_log_prob when the action taken did not use the screen
            policy_loss = -(ns_log_prob + spatial_log_prob *
                            screen_used) * advantage - entropy * ENTROPY_RATE

            total_loss = policy_loss + value_loss

            _entropy = K.mean(entropy)
            _policy_loss = K.mean(K.abs(policy_loss))
            _value_loss = K.mean(value_loss)

        gradients = tape.gradient(total_loss, self.model.trainable_variables)
        global_norm = tf.linalg.global_norm(gradients)
        print(tf.linalg.global_norm(gradients))
        gradients, _ = tf.clip_by_global_norm(
            gradients,
            GRADIENT_CLIP_MAX)  # Prevents exploding gradients...I think
        self.opt.apply_gradients(zip(gradients,
                                     self.model.trainable_variables))

        return [
            float(_value_loss),
            float(_policy_loss),
            float(_entropy), global_norm
        ]