Example #1
0
    def test_disconnected_paths(self):
        # Test that taking gradient going through a disconnected
        # path rasises an exception
        T = theano.tensor
        a = np.asarray(self.rng.randn(5, 5),
                       dtype=config.floatX)

        x = T.matrix('x')

        # This MUST raise a DisconnectedInputError error.
        # This also rasies an additional warning from gradients.py.
        self.assertRaises(gradient.DisconnectedInputError, gradient.grad,
                          gradient.disconnected_grad(x).sum(), x)

        # This MUST NOT raise a DisconnectedInputError error.
        y = gradient.grad((x + gradient.disconnected_grad(x)).sum(), x)

        a = T.matrix('a')
        b = T.matrix('b')
        y = a + gradient.disconnected_grad(b)
        # This MUST raise a DisconnectedInputError error.
        # This also rasies an additional warning from gradients.py.
        self.assertRaises(gradient.DisconnectedInputError,
                          gradient.grad, y.sum(), b)

        # This MUST NOT raise a DisconnectedInputError error.
        gradient.grad(y.sum(), a)
Example #2
0
    def test_disconnected_paths(self):
        # Test that taking gradient going through a disconnected
        # path rasises an exception
        T = theano.tensor
        a = np.asarray(self.rng.randn(5, 5), dtype=config.floatX)

        x = T.matrix("x")

        # This MUST raise a DisconnectedInputError error.
        # This also rasies an additional warning from gradients.py.
        with pytest.raises(gradient.DisconnectedInputError):
            gradient.grad(gradient.disconnected_grad(x).sum(), x)

        # This MUST NOT raise a DisconnectedInputError error.
        y = gradient.grad((x + gradient.disconnected_grad(x)).sum(), x)

        a = T.matrix("a")
        b = T.matrix("b")
        y = a + gradient.disconnected_grad(b)
        # This MUST raise a DisconnectedInputError error.
        # This also rasies an additional warning from gradients.py.
        with pytest.raises(gradient.DisconnectedInputError):
            gradient.grad(y.sum(), b)

        # This MUST NOT raise a DisconnectedInputError error.
        gradient.grad(y.sum(), a)
Example #3
0
    def compute_hessian(self, objective, argument):
        """
        Computes the directional derivative of the gradient (which is equal to
        the Hessian multiplied by direction).
        """
        g = T.grad(objective, argument)

        # Create a new tensor A, which has the same type (i.e. same
        # dimensionality) as argument.
        is_product_manifold = isinstance(argument, (list, tuple))
        if not is_product_manifold:
            A = argument.type()
        else:
            A = [arg.type() for arg in argument]

        # First attempt efficient 'R-op', this directly calculates the
        # directional derivative of the gradient.
        try:
            R = T.Rop(g, argument, A)
        except NotImplementedError:
            # Implementation based on
            # tensorflow.python.ops.gradients_impl._hessian_vector_product
            if not is_product_manifold:
                proj = T.sum(g * disconnected_grad(A))
                R = T.grad(proj, argument)
            else:
                proj = [
                    T.sum(g_elem * disconnected_grad(a_elem))
                    for g_elem, a_elem in zip(g, A)
                ]
                proj_grad = [
                    T.grad(proj_elem,
                           argument,
                           disconnected_inputs="ignore",
                           return_disconnected="None") for proj_elem in proj
                ]
                proj_grad_transpose = map(list, zip(*proj_grad))
                proj_grad_stack = [
                    T.stacklists([c for c in row if c is not None])
                    for row in proj_grad_transpose
                ]
                R = [T.sum(stack, axis=0) for stack in proj_grad_stack]

        if not is_product_manifold:
            hess = theano.function([argument, A], R, on_unused_input="warn")
        else:
            hess_prod = theano.function(argument + A,
                                        R,
                                        on_unused_input="warn")

            def hess(x, a):
                return hess_prod(*(x + a))

        return hess
def __step(img, prev_bbox, state, timestep):
	conv1 = conv2d(img, conv1_filters, subsample=(conv1_stride, conv1_stride), border_mode='half')
	act1 = NN.relu(conv1)
	flat1 = TT.reshape(act1, (-1, conv1_output_dim))
	gru_in = TT.concatenate([flat1, prev_bbox], axis=1)
	gru_z = NN.sigmoid(TT.dot(gru_in, Wz) + TT.dot(state, Uz) + bz)
	gru_r = NN.sigmoid(TT.dot(gru_in, Wr) + TT.dot(state, Ur) + br)
	gru_h_ = TT.tanh(TT.dot(gru_in, Wg) + TT.dot(gru_r * state, Ug) + bg)
	gru_h = (1 - gru_z) * state + gru_z * gru_h_
	bbox = TT.tanh(TT.dot(gru_h, W_fc2) + b_fc2)

        bbox_cx = ((bbox[:, 2] + bbox[:, 0]) / 2 + 1) / 2 * img_row
        bbox_cy = ((bbox[:, 3] + bbox[:, 1]) / 2 + 1) / 2 * img_col
        bbox_w = TT.abs_(bbox[:, 2] - bbox[:, 0]) / 2 * img_row
        bbox_h = TT.abs_(bbox[:, 3] - bbox[:, 1]) / 2 * img_col
        x = TT.arange(img_row, dtype=T.config.floatX)
        y = TT.arange(img_col, dtype=T.config.floatX)
	mx = TT.maximum(TT.minimum(-TT.abs_(x.dimshuffle('x', 0) - bbox_cx.dimshuffle(0, 'x')) + bbox_w.dimshuffle(0, 'x') / 2., 1), 1e-4)
	my = TT.maximum(TT.minimum(-TT.abs_(y.dimshuffle('x', 0) - bbox_cy.dimshuffle(0, 'x')) + bbox_h.dimshuffle(0, 'x') / 2., 1), 1e-4)
        bbox_mask = mx.dimshuffle(0, 1, 'x') * my.dimshuffle(0, 'x', 1)

        new_cls1_f = cls_f
        new_cls1_b = cls_b

        mask = act1 * bbox_mask.dimshuffle(0, 'x', 1, 2)

        new_featmaps = TG.disconnected_grad(TT.set_subtensor(featmaps[:, timestep], mask))
	new_featmaps.name = 'new_featmaps'
        new_probmaps = TG.disconnected_grad(TT.set_subtensor(probmaps[:, timestep], bbox_mask))
	new_probmaps.name = 'new_probmaps'

        train_featmaps = TG.disconnected_grad(new_featmaps[:, :timestep+1].reshape(((timestep + 1) * batch_size, conv1_nr_filters, img_row, img_col)))
	train_featmaps.name = 'train_featmaps'
        train_probmaps = TG.disconnected_grad(new_probmaps[:, :timestep+1])
	train_probmaps.name = 'train_probmaps'

        for _ in range(0, 5):
		train_convmaps = conv2d(train_featmaps, new_cls1_f, subsample=(cls1_stride, cls1_stride), border_mode='half').reshape((batch_size, timestep + 1, batch_size, img_row, img_col))
		train_convmaps.name = 'train_convmaps'
		train_convmaps_selected = train_convmaps[TT.arange(batch_size).repeat(timestep+1), TT.tile(TT.arange(timestep+1), batch_size), TT.arange(batch_size).repeat(timestep+1)].reshape((batch_size, timestep+1, img_row, img_col))
		train_convmaps_selected.name = 'train_convmaps_selected'
		train_predmaps = NN.sigmoid(train_convmaps_selected + new_cls1_b.dimshuffle(0, 'x', 'x', 'x'))
		train_loss = NN.binary_crossentropy(train_predmaps, train_probmaps).mean()
                train_grad_cls1_f, train_grad_cls1_b = T.grad(train_loss, [new_cls1_f, new_cls1_b])
                new_cls1_f -= train_grad_cls1_f * 0.1
                new_cls1_b -= train_grad_cls1_b * 0.1

	return (bbox, gru_h, timestep + 1, mask, bbox_mask), {cls_f: TG.disconnected_grad(new_cls1_f), cls_b: TG.disconnected_grad(new_cls1_b), featmaps: TG.disconnected_grad(new_featmaps), probmaps: TG.disconnected_grad(new_probmaps)}
Example #5
0
 def compute_activations(self, input_data, do_round=True):
     layer_input = input_data
     layer_signals = []
     for i, (w, b, k) in enumerate(zip(self.ws, self.bs,
                                       self.get_scales())):
         scaled_input = layer_input * k
         if not do_round:
             eta = None
             spikes = scaled_input
         else:
             eta = tt.round(scaled_input) - scaled_input
             spikes = scaled_input + disconnected_grad(eta)
         nonlinearity = get_named_activation_function(
             self.hidden_activations if i < len(self.ws) -
             1 else self.output_activation)
         output = nonlinearity((spikes / k).dot(w) + b)
         layer_signals.append({
             'input': layer_input,
             'scaled_input': scaled_input,
             'eta': eta,
             'spikes': spikes,
             'output': output
         })
         layer_input = output
     return layer_signals
Example #6
0
    def __init__(self, rewards_getter, seq2seq):
        """
        Args:
            rewards_getter (BeLikeXRewards):
            seq2seq (seq2seq.Seq2Seq):
        """
        self.rewards_getter = rewards_getter
        self.s2s = seq2seq

        self.rewards = rewards_getter.get(self.s2s.gentrain.words_seq)
        self.baseline = rewards_getter.get(self.s2s.gentrain.words_seq_greedy)

        self.advantage = disconnected_grad(self.rewards - self.baseline)  # [batch_size,]
        assert self.advantage.ndim == 1, "WHAT IS WRONG WITH ADVANTAGE FUNCTION???"

        predicted_probas = self.s2s.gentrain.predicted_probas  # [batch_size*n_steps, n_tokens]
        self.action_probs = predicted_probas[T.arange(predicted_probas.shape[0]), self.s2s.gentrain.words_seq[:,:-1].ravel()]
        self.action_probs = self.action_probs.reshape((self.advantage.shape[0], -1))  # [batch_size, n_steps]

        self.weights = self.s2s.gentest.weights

        self.loss = (-self.advantage[:, None] * self.action_probs).mean() + self.s2s.gentrain.llh_loss * self.LLH_ALPHA

        self.pg_grads = lasagne.updates.total_norm_constraint(T.grad(self.loss, self.weights),
                                                              Config.TOTAL_NORM_GRAD_CLIP)

        self.pg_updates = lasagne.updates.adam(self.pg_grads, self.weights)

        self.train_step = theano.function(self.rewards_getter.input_vars + [self.s2s.enc.input_phrase, self.s2s.gentrain.reference_answers],
                                          [self.loss, self.rewards.mean()],
                                          updates=self.pg_updates + self.s2s.gentrain.recurrence.get_automatic_updates() + self.s2s.gentrain.recurrence_greedy_updates,
                                          on_unused_input='warn')
Example #7
0
    def _compute_nary_hessian_vector_product(self, gradients, arguments):
        """Returns a function accepting `2 * len(arguments)` arguments to
        compute a Hessian-vector product of a multivariate function.

        Notes
        -----
        The implementation is based on TensorFlow's '_hessian_vector_product'
        function in 'tensorflow.python.ops.gradients_impl'.
        """
        argument_types = [argument.type() for argument in arguments]
        try:
            Rop = T.Rop(gradients, arguments, argument_types)
        except NotImplementedError:
            proj = [
                T.sum(gradient * disconnected_grad(argument_type))
                for gradient, argument_type in zip(gradients, argument_types)
            ]
            proj_grad = [
                T.grad(proj_elem,
                       arguments,
                       disconnected_inputs="ignore",
                       return_disconnected="None") for proj_elem in proj
            ]
            proj_grad_transpose = map(list, zip(*proj_grad))
            proj_grad_stack = [
                T.stacklists([c for c in row if c is not None])
                for row in proj_grad_transpose
            ]
            Rop = [T.sum(stack, axis=0) for stack in proj_grad_stack]
        return self._compile_function_without_warnings(
            list(itertools.chain(arguments, argument_types)), Rop)
Example #8
0
def virtual_adversarial_training(predict_fn, inputs, logits, epsilon,
                                 num_iterations=1, xi=1e-6):
    vat_perturbation = virtual_adversarial_perturbation(
        predict_fn, inputs, logits, epsilon, num_iterations, xi)
    logits_vat = predict_fn(inputs + vat_perturbation)
    loss = kl_with_logits(gradient.disconnected_grad(logits), logits_vat)
    return loss
Example #9
0
    def build_model(self, p):
        S = Input(p['input_shape'], name='input_state')
        A = Input((1, ), name='input_action', dtype='int32')
        R = Input((1, ), name='input_reward')
        T = Input((1, ), name='input_terminate', dtype='int32')
        NS = Input(p['input_shape'], name='input_next_sate')

        self.Q_model = self.build_cnn_model(p)
        self.Q_old_model = self.build_cnn_model(p, False)  # Q hat in paper
        self.Q_old_model.set_weights(self.Q_model.get_weights())  # Q' = Q

        Q_S = self.Q_model(S)  # batch * actions
        Q_NS = disconnected_grad(
            self.Q_old_model(NS))  # disconnected gradient is not necessary

        y = R + p['discount'] * (1 - T) * K.max(Q_NS, axis=1,
                                                keepdims=True)  # batch * 1

        action_mask = K.equal(
            Tht.arange(p['num_actions']).reshape((1, -1)), A.reshape((-1, 1)))
        output = K.sum(Q_S * action_mask, axis=1).reshape((-1, 1))
        loss = K.sum((output - y)**2)  # sum could also be mean()

        optimizer = adam(p['learning_rate'])
        params = self.Q_model.trainable_weights
        update = optimizer.get_updates(params, [], loss)

        self.training_func = K.function([S, A, R, T, NS], loss, updates=update)
        self.Q_func = K.function([S], Q_S)
Example #10
0
    def build_model(self, p):
        S = Input(p['input_shape'], name='input_state')
        A = Input((1,), name='input_action', dtype='int32')
        R = Input((1,), name='input_reward')
        T = Input((1,), name='input_terminate', dtype='int32')
        NS = Input(p['input_shape'], name='input_next_sate')

        self.Q_model = self.build_cnn_model(p)
        self.Q_old_model = self.build_cnn_model(p, False)  # Q hat in paper
        self.Q_old_model.set_weights(self.Q_model.get_weights())  # Q' = Q

        Q_S = self.Q_model(S)  # batch * actions
        Q_NS = disconnected_grad(self.Q_old_model(NS))  # disconnected gradient is not necessary

        y = R + p['discount'] * (1-T) * K.max(Q_NS, axis=1, keepdims=True)  # batch * 1

        action_mask = K.equal(Tht.arange(p['num_actions']).reshape((1, -1)), A.reshape((-1, 1)))
        output = K.sum(Q_S * action_mask, axis=1).reshape((-1, 1))
        loss = K.sum((output - y) ** 2)  # sum could also be mean()

        optimizer = adam(p['learning_rate'])
        params = self.Q_model.trainable_weights
        update = optimizer.get_updates(params, [], loss)

        self.training_func = K.function([S, A, R, T, NS], loss, updates=update)
        self.Q_func = K.function([S], Q_S)
Example #11
0
    def test_connection_pattern(self):
        T = theano.tensor
        x = T.matrix('x')
        y = gradient.disconnected_grad(x)

        connection_pattern = y.owner.op.connection_pattern(y.owner)
        assert connection_pattern == [[False]]
Example #12
0
 def test_op_removed(self):
     x = theano.tensor.matrix("x")
     y = x * gradient.disconnected_grad(x)
     f = theano.function([x], y)
     # need to refer to theano.gradient.disconnected_grad here,
     # theano.gradient.disconnected_grad is a wrapper function!
     assert gradient.disconnected_grad_ not in [node.op for node in f.maker.fgraph.toposort()]
 def _encode(self,
             application_call,
             text,
             mask,
             def_embs=None,
             def_map=None,
             text_name=None):
     if not self._random_unk:
         text = (tensor.lt(text, self._num_input_words) * text +
                 tensor.ge(text, self._num_input_words) * self._vocab.unk)
     if text_name:
         application_call.add_auxiliary_variable(
             unk_ratio(text, mask, self._vocab.unk),
             name='{}_unk_ratio'.format(text_name))
     embs = self._lookup.apply(text)
     if self._random_unk:
         embs = (tensor.lt(text, self._num_input_words)[:, :, None] * embs +
                 tensor.ge(text, self._num_input_words)[:, :, None] *
                 disconnected_grad(embs))
     if def_embs:
         embs = self._combiner.apply(embs, mask, def_embs, def_map)
     add_role(embs, EMBEDDINGS)
     encoded = flip01(
         self._encoder_rnn.apply(self._encoder_fork.apply(flip01(embs)),
                                 mask=mask.T)[0])
     return encoded
Example #14
0
    def test_connection_pattern(self):
        T = theano.tensor
        x = T.matrix("x")
        y = gradient.disconnected_grad(x)

        connection_pattern = y.owner.op.connection_pattern(y.owner)
        assert connection_pattern == [[False]]
Example #15
0
    def __init__(self,
                 weights,
                 neurons_topology,
                 cr_adjusting_sigma=1.5,
                 cr_adjusting_sigma_decay=0.9,
                 cr_learning_rate=0.005,
                 cr_learning_rate_decay=0.9,
                 **kwargs):
        super(ClusterRefiningSOM, self).__init__(weights, neurons_topology,
                                                 **kwargs)

        self.cr_adjusting_sigma = theano.shared(cr_adjusting_sigma)
        self.cr_adjusting_sigma_decay = cr_adjusting_sigma_decay
        self.cr_learning_rate = theano.shared(cr_learning_rate)
        self.cr_learning_rate_decay = cr_learning_rate_decay

        self.affinities_to_data_point = T.exp(-self.distance_from_y_row /
                                              (self.cr_adjusting_sigma)**2)
        self.smoothed_distances_from_data_point = T.mul(
            self.distance_from_y_row,
            G.disconnected_grad(self.affinities_to_data_point))
        self.cr_affinity_cost_scal = self.smoothed_distances_from_data_point.sum(
        )
        self.cr_updates = sgd(self.cr_affinity_cost_scal, [self.W_shar_mat],
                              learning_rate=self.cr_learning_rate)
        self.cr_update_neurons = theano.function([self.x_row],
                                                 self.cr_affinity_cost_scal,
                                                 updates=self.cr_updates)
Example #16
0
    def value_recur(self, ngs_jv, ngt_jv, b_jm1v, b_jm1N):
        # padding dummy
        Wfbs = T.concatenate(
            [self.Wfbs, T.zeros_like(self.Wfbs[-1:, :])], axis=0)
        Wfbt = T.concatenate(
            [self.Wfbt, T.zeros_like(self.Wfbt[-1:, :])], axis=0)
        # get ngram embedding
        fembs_v = T.sum(Wfbs[ngs_jv, :], axis=0)
        fembt_v = T.sum(Wfbt[ngt_jv, :], axis=0)
        # calculate g value
        g_jv = T.dot(
            self.Whb,
            T.nnet.sigmoid(fembs_v + fembt_v +
                           G.disconnected_grad(b_jm1v) * self.Wrec +
                           G.disconnected_grad(b_jm1N) * self.Wnon + self.B0))

        return g_jv
Example #17
0
 def test_op_removed(self):
     x = theano.tensor.matrix('x')
     y = x * gradient.disconnected_grad(x)
     f = theano.function([x], y)
     # need to refer to theano.gradient.disconnected_grad here,
     # theano.gradient.disconnected_grad is a wrapper function!
     assert gradient.disconnected_grad_ not in \
         [node.op for node in f.maker.fgraph.toposort()]
Example #18
0
    def cost(self, given_x, application_call):
        """Computes the loss function.

        Parameters
        ----------
        given_x : tensor variable
                  Batch of given visible states from dataset.

        Notes
        -----
        The `application_call` argument is an effect of the `application`
        decorator and isn't visible to users. It's used internally to
        set an updates dictionary for  `h` that's
        discoverable by `ComputationGraph`.

        """
        x = given_x
        h_prev = self.h + self.initial_noise * self.theano_rng.normal(size=self.h.shape, dtype=self.h.dtype)
        h = h_next = h_prev
        old_energy = self.pp(self.energy(x, h).sum(), "old_energy", 1)
        for iteration in range(self.n_inference_steps):
            h_prev = h
            h = h_next
            h_next = self.pp(
                disconnected_grad(self.langevin_update(self.pp(x, "x", 3), self.pp(h_next, "h", 2))), "h_next", 2
            )
            new_energy = self.pp(self.energy(x, h_next).sum(), "new_energy", 1)
            delta_energy = self.pp(old_energy - new_energy, "delta_energy", 1)
            old_energy = new_energy
            h_prediction_residual = (
                h_next - self.pp(h_prev, "h_prev", 3) + self.epsilon * tensor.grad(self.energy(x, h_prev).sum(), h_prev)
            )
            J_h = self.pp((h_prediction_residual * h_prediction_residual).sum(axis=1).mean(axis=0), "J_h", 1)
            x_prediction_residual = self.pp(tensor.grad(self.energy(given_x, h_prev).sum(), given_x), "x_residual", 2)
            J_x = self.pp((x_prediction_residual * x_prediction_residual).sum(axis=1).mean(axis=0), "J_x", 1)
            if self.debug > 1:
                application_call.add_auxiliary_variable(J_x, name="J_x" + str(iteration))
                application_call.add_auxiliary_variable(J_h, name="J_h" + str(iteration))
            if iteration == 0:
                total_cost = J_h + J_x
            else:
                total_cost = total_cost + J_h + J_x

        per_iteration_cost = total_cost / self.n_inference_steps

        updates = OrderedDict([(self.h, h_next)])
        application_call.updates = dict_union(application_call.updates, updates)

        if self.debug > 0:
            application_call.add_auxiliary_variable(per_iteration_cost, name="per_iteration_cost")
        if self.debug > 1:
            application_call.add_auxiliary_variable(self.Wxh * 1.0, name="Wxh")
            application_call.add_auxiliary_variable(self.Whh * 1.0, name="Whh")
            application_call.add_auxiliary_variable(self.Wxx * 1.0, name="Wxx")
            application_call.add_auxiliary_variable(self.b * 1, name="b")
            application_call.add_auxiliary_variable(self.c * 1, name="c")

        return self.pp(total_cost, "total_cost")
Example #19
0
 def value_recur(self, ngs_jv, ngt_jv, b_jm1v, b_jm1N):
     
     # padding dummy
     Wfbs = T.concatenate([self.Wfbs,T.zeros_like(self.Wfbs[-1:,:])],
             axis=0)
     Wfbt = T.concatenate([self.Wfbt,T.zeros_like(self.Wfbt[-1:,:])],
             axis=0)
     # get ngram embedding
     fembs_v= T.sum(Wfbs[ngs_jv,:],axis=0)
     fembt_v= T.sum(Wfbt[ngt_jv,:],axis=0)
     # calculate g value
     g_jv =  T.dot( self.Whb, T.nnet.sigmoid(
             fembs_v + fembt_v + 
             G.disconnected_grad(b_jm1v)*self.Wrec +
             G.disconnected_grad(b_jm1N)*self.Wnon +
             self.B0 ))
     
     return g_jv
Example #20
0
def mean_interp_pad(x, padding):
    padding = (padding, padding) if isinstance(padding, int) else tuple(padding)
    size = tuple(np.array(padding) * 2 + 1)
    resize = ((x.shape[2] + 2 * padding[0], x.shape[2] - 2 * padding[0]),
              (x.shape[3] + 2 * padding[1], x.shape[3] - 2 * padding[1]))
    y = pool(x, size, (1, 1), mode='average_exc_pad')
    z = G.disconnected_grad(nn.utils.frac_bilinear_upsampling(y, resize))
    _, _, h, w = z.shape
    return T.set_subtensor(z[:, :, padding[0]:h - padding[0], padding[1]:w - padding[1]], x)
Example #21
0
    def value_recur(self, vsrcpos_jsv, vtarpos_jsv, ssrcpos_jsv, starpos_jsv,
            b_jm1v, b_jm1N, ngms_j, ngmt_jm1, uttms_j, uttmt_jm1):

        # source features
        ssrcemb_jsv = T.sum(ngms_j[ssrcpos_jsv,:],axis=0)
        vsrcemb_jsv = T.sum(ngms_j[vsrcpos_jsv,:],axis=0)
        src_jsv = T.concatenate([ssrcemb_jsv,vsrcemb_jsv,uttms_j],axis=0)
        # target features
        staremb_jsv = T.sum(ngmt_jm1[starpos_jsv,:],axis=0)
        vtaremb_jsv = T.sum(ngmt_jm1[vtarpos_jsv,:],axis=0)
        tar_jsv = T.concatenate([staremb_jsv,vtaremb_jsv,uttmt_jm1],axis=0)
        # update g_jv 
        g_jv =  T.dot( self.Whb, T.nnet.sigmoid(
                T.dot(src_jsv,self.Wfbs) + T.dot(tar_jsv,self.Wfbt)+ 
                G.disconnected_grad(b_jm1v)*self.Wrec +
                G.disconnected_grad(b_jm1N)*self.Wnon + self.B0 ))
        
        return g_jv
Example #22
0
 def gather_end_points(inputs_var, *args, **kwargs):
     logits = lasagne.layers.get_output(net, inputs=inputs_var, **kwargs)
     predictions = gradient.disconnected_grad(T.argmax(logits, axis=1))
     prob = T.nnet.softmax(logits)
     end_points = {
         'logits': logits,
         'predictions': predictions,
         'prob': prob
     }
     return end_points
Example #23
0
    def build_functions(self):
        A = Input(shape=(1, ), dtype='int32')
        R = Input(shape=(1, ), dtype='float32')
        T = Input(shape=(1, ), dtype='int32')

        if self.is_building_mlp:
            CNN_State = Input(shape=self.cnn_input_size)
            NN_State = Input(shape=self.nn_input_size)
            State = [CNN_State, NN_State]

            CNN_NState = Input(shape=self.cnn_input_size)
            NN_NState = Input(shape=self.nn_input_size)
            NState = [CNN_NState, NN_NState]
        else:
            State = Input(shape=self.cnn_input_size)
            NState = Input(shape=self.cnn_input_size)
        self.log["debug"]("State : " + str(State))
        self.log["debug"]("NState : " + str(NState))
        self.build_cnn_model()
        if self.is_building_mlp:
            self.value_fn = K.function(State, self.model(State))
            VS = self.model(State)
            VNS = disconnected_grad(self.model(NState))
        else:
            self.value_fn = K.function([State], self.model(State))
            VS = self.model([State])
            VNS = disconnected_grad(self.model([NState]))
        future_value = (1 - T) * VNS.max(axis=1, keepdims=True)
        discounted_future_value = self.discount * future_value
        target = R + discounted_future_value
        cost = ((VS[:, A] - target)**2).mean()
        opt = RMSprop(lr=self.lr)
        params = self.model.trainable_weights
        updates = opt.get_updates(params, [], cost)
        if self.is_building_mlp:
            self.train_fn = K.function(
                [CNN_State, NN_State, CNN_NState, NN_NState, A, R, T],
                cost,
                updates=updates)
        else:
            self.train_fn = K.function([State, NState, A, R, T],
                                       cost,
                                       updates=updates)
Example #24
0
    def test_grad(self):
        T = theano.tensor
        a = np.asarray(self.rng.randn(5, 5), dtype=config.floatX)

        x = T.matrix("x")

        expressions_gradients = [
            (x * gradient.disconnected_grad(x), x),
            (x * gradient.disconnected_grad(T.exp(x)), T.exp(x)),
            (x ** 2 * gradient.disconnected_grad(x), 2 * x ** 2),
        ]

        for expr, expr_grad in expressions_gradients:
            g = gradient.grad(expr.sum(), x)
            # gradient according to theano
            f = theano.function([x], g, on_unused_input="ignore")
            # desired gradient
            f2 = theano.function([x], expr_grad, on_unused_input="ignore")

            assert np.allclose(f(a), f2(a))
Example #25
0
    def test_grad(self):
        T = theano.tensor
        a = np.asarray(self.rng.randn(5, 5), dtype=config.floatX)

        x = T.matrix("x")

        expressions_gradients = [
            (x * gradient.disconnected_grad(x), x),
            (x * gradient.disconnected_grad(T.exp(x)), T.exp(x)),
            (x**2 * gradient.disconnected_grad(x), 2 * x**2),
        ]

        for expr, expr_grad in expressions_gradients:
            g = gradient.grad(expr.sum(), x)
            # gradient according to theano
            f = theano.function([x], g, on_unused_input="ignore")
            # desired gradient
            f2 = theano.function([x], expr_grad, on_unused_input="ignore")

            assert np.allclose(f(a), f2(a))
Example #26
0
    def value_recur(self, vsrcpos_jsv, vtarpos_jsv, ssrcpos_jsv, starpos_jsv,
                    b_jm1v, b_jm1N, ngms_j, ngmt_jm1, uttms_j, uttmt_jm1):

        # source features
        ssrcemb_jsv = T.sum(ngms_j[ssrcpos_jsv, :], axis=0)
        vsrcemb_jsv = T.sum(ngms_j[vsrcpos_jsv, :], axis=0)
        src_jsv = T.concatenate([ssrcemb_jsv, vsrcemb_jsv, uttms_j], axis=0)
        # target features
        staremb_jsv = T.sum(ngmt_jm1[starpos_jsv, :], axis=0)
        vtaremb_jsv = T.sum(ngmt_jm1[vtarpos_jsv, :], axis=0)
        tar_jsv = T.concatenate([staremb_jsv, vtaremb_jsv, uttmt_jm1], axis=0)
        # update g_jv
        g_jv = T.dot(
            self.Whb,
            T.nnet.sigmoid(
                T.dot(src_jsv, self.Wfbs) + T.dot(tar_jsv, self.Wfbt) +
                G.disconnected_grad(b_jm1v) * self.Wrec +
                G.disconnected_grad(b_jm1N) * self.Wnon + self.B0))

        return g_jv
Example #27
0
def fgm(x,
        predictions,
        y=None,
        eps=0.3,
        ord=np.inf,
        clip_min=None,
        clip_max=None):
    """
    Theano implementation of the Fast Gradient
    Sign method.
    :param x: the input placeholder
    :param predictions: the model's output tensor
    :param y: the output placeholder. Use None (the default) to avoid the
            label leaking effect.
    :param eps: the epsilon (input variation parameter)
    :param ord: (optional) Order of the norm (mimics Numpy).
                Possible values: np.inf (other norms not implemented yet).
    :param clip_min: optional parameter that can be used to set a minimum
                    value for components of the example returned
    :param clip_max: optional parameter that can be used to set a maximum
                    value for components of the example returned
    :return: a tensor for the adversarial example
    """
    warnings.warn("cleverhans support for Theano is deprecated and "
                  "will be dropped on 2017-11-08.")
    assert ord == np.inf, "Theano implementation not available for this norm."
    eps = np.asarray(eps, dtype=floatX)

    if y is None:
        # Using model predictions as ground truth to avoid label leaking
        y = T.eq(predictions, T.max(predictions, axis=1, keepdims=True))
    y = T.cast(y, utils_th.floatX)
    y = y / T.sum(y, 1, keepdims=True)
    # Compute loss
    loss = utils_th.model_loss(y, predictions, mean=True)

    # Define gradient of loss wrt input
    grad = T.grad(loss, x)

    # Take sign of gradient
    signed_grad = T.sgn(grad)

    # Multiply by constant epsilon
    scaled_signed_grad = eps * signed_grad

    # Add perturbation to original example to obtain adversarial example
    adv_x = gradient.disconnected_grad(x + scaled_signed_grad)

    # If clipping is needed, reset all values outside of [clip_min, clip_max]
    if (clip_min is not None) and (clip_max is not None):
        adv_x = T.clip(adv_x, clip_min, clip_max)

    return adv_x
Example #28
0
 def _compute_unary_hessian_vector_product(self, gradient, argument):
     """Returns a function accepting two arguments to compute a
     Hessian-vector product of a scalar-valued unary function.
     """
     argument_type = argument.type()
     try:
         Rop = T.Rop(gradient, argument, argument_type)
     except NotImplementedError:
         proj = T.sum(gradient * disconnected_grad(argument_type))
         Rop = T.grad(proj, argument)
     return self._compile_function_without_warnings(
         [argument, argument_type], Rop)
Example #29
0
def vatm(model,
         x,
         predictions,
         eps,
         num_iterations=1,
         xi=1e-6,
         clip_min=None,
         clip_max=None,
         seed=12345):
    """
    Theano implementation of the perturbation method used for virtual
    adversarial training: https://arxiv.org/abs/1507.00677
    :param model: the model which returns the network unnormalized logits
    :param x: the input placeholder
    :param predictions: the model's unnormalized output tensor
    :param eps: the epsilon (input variation parameter)
    :param num_iterations: the number of iterations
    :param xi: the finite difference parameter
    :param clip_min: optional parameter that can be used to set a minimum
                    value for components of the example returned
    :param clip_max: optional parameter that can be used to set a maximum
                    value for components of the example returned
    :param seed: the seed for random generator
    :return: a tensor for the adversarial example
    """
    eps = np.asarray(eps, dtype=floatX)
    xi = np.asarray(xi, dtype=floatX)
    rng = RandomStreams(seed=seed)
    d = rng.normal(size=x.shape, dtype=x.dtype)
    for i in range(num_iterations):
        d = xi * utils_th.l2_batch_normalize(d)
        logits_d = model(x + d)
        kl = utils_th.kl_with_logits(predictions, logits_d)
        Hd = T.grad(kl.sum(), d)
        d = gradient.disconnected_grad(Hd)
    d = eps * utils_th.l2_batch_normalize(d)
    adv_x = gradient.disconnected_grad(x + d)
    if (clip_min is not None) and (clip_max is not None):
        adv_x = T.clip(adv_x, clip_min, clip_max)
    return adv_x
Example #30
0
def virtual_adversarial_perturbation(predict_fn, inputs, logits, epsilon,
                                     num_iterations=1, xi=1e-6, seed=12345):
    epsilon = floatX(epsilon)
    xi = floatX(xi)
    rng = RandomStreams(seed=seed)
    d = rng.normal(size=inputs.shape, dtype=inputs.dtype)
    for i in range(num_iterations):
        d = xi * normalize_perturbation(d)
        logits_d = predict_fn(inputs + d)
        kl = kl_with_logits(logits, logits_d)
        Hd = T.grad(kl.sum(), d)
        d = gradient.disconnected_grad(Hd)
    return epsilon * normalize_perturbation(d)
Example #31
0
def generate_adv_example(embedded, loss, perturb_scale):
    # embedded: [n_examples, input_length, feature_dim]

    grad = gradient.grad(loss, embedded)
    grad = gradient.disconnected_grad(grad)

    shifted = embedded + T.max(T.abs_(embedded)) + 1.0
    grad_dim = (shifted / shifted).sum(axis=(1, 2)).mean(
        axis=0)  # grad dim for each example
    sqrt_grad_dim = T.sqrt(grad_dim)  # sqrt(input_length * emb_dim)
    perturb = perturb_scale * sqrt_grad_dim * _scale_unit_l2(grad)

    return embedded + perturb
Example #32
0
    def costs(self, application_call, prediction, prediction_mask, groundtruth,
              groundtruth_mask, **inputs):
        states = disconnected_grad(inputs['states'])

        merged = self.merge(**dict_subset(inputs, self.merge_names))
        # Compute log-probabilities for the predicted tokens
        log_probs = -self.all_scores(prediction, merged) * prediction_mask
        # Compute per-token rewards
        rewards = self.reward_brick.apply(prediction, prediction_mask,
                                          groundtruth,
                                          groundtruth_mask).sum(axis=-1)
        # Encourage entropy by adding negated log-probs to the rewards
        application_call.add_auxiliary_variable(log_probs, name='log_probs')
        if self.entropy_coof:
            rewards += self.entropy_coof * disconnected_grad(-log_probs)

        future_rewards = rewards[::-1].cumsum(axis=0)[::-1]

        baselines = self.value_prediction.apply(states)[:, :, 0]
        application_call.add_auxiliary_variable(baselines, name='baselines')
        # Compute baseline error
        centered_future_rewards = future_rewards - baselines
        baseline_errors = ((centered_future_rewards *
                            disconnected_grad(prediction_mask))**2).sum(axis=0)
        application_call.add_auxiliary_variable(baseline_errors,
                                                name='baseline_errors')

        # The gradient of this will be the REINFORCE 1-sample
        # gradient estimate
        costs = (disconnected_grad(centered_future_rewards) * log_probs *
                 prediction_mask).sum(axis=0)

        # Add auxiliary variables for intermediate steps of the computation
        application_call.add_auxiliary_variable(rewards, name='rewards')
        application_call.add_auxiliary_variable(log_probs.copy(),
                                                name='prediction_log_probs')

        return costs
Example #33
0
    def build_functions(self):
        S = Input(shape=self.state_size)
        NS = Input(shape=self.state_size)
        A = Input(shape=(1, ), dtype='int32')
        R = Input(shape=(1, ), dtype='float32')
        T = Input(shape=(1, ), dtype='int32')
        self.build_model()
        self.value_fn = K.function([S], self.model(S))

        VS = self.model(S)
        VNS = disconnected_grad(self.model(NS))
        future_value = (1 - T) * VNS.max(axis=1, keepdims=True)
        discounted_future_value = self.discount * future_value
        target = R + discounted_future_value
        cost = ((VS[:, A] - target)**2).mean()
        opt = RMSprop(0.0001)
        params = self.model.trainable_weights
        updates = opt.get_updates(params, [], cost)
        self.train_fn = K.function([S, NS, A, R, T], cost, updates=updates)
Example #34
0
    def build_functions(self):
        S = Input(shape=self.state_size)
        NS = Input(shape=self.state_size)
        A = Input(shape=(1,), dtype='int32')
        R = Input(shape=(1,), dtype='float32')
        T = Input(shape=(1,), dtype='int32')
        self.build_model()
        self.value_fn = K.function([S], self.model(S))

        VS = self.model(S)
        VNS = disconnected_grad(self.model(NS))
        future_value = (1-T) * VNS.max(axis=1, keepdims=True)
        discounted_future_value = self.discount * future_value
        target = R + discounted_future_value
        cost = ((VS[:, A] - target)**2).mean()
        opt = RMSprop(0.0001)
        params = self.model.trainable_weights
        updates = opt.get_updates(params, [], cost)
        self.train_fn = K.function([S, NS, A, R, T], cost, updates=updates)
Example #35
0
    def __init__(self,
                 weights,
                 neurons_topology,
                 learning_rate=0.1,
                 learning_rate_decay=0.985,
                 collaboration_sigma=1.0,
                 collaboration_sigma_decay=0.95,
                 verbosity=2):

        self._verbosity = verbosity
        self._history = []
        self.neurons_number = weights.shape[0]
        self.W_shar_mat = theano.shared(weights)
        self.D_shar_mat = theano.shared(neurons_topology)

        self.collaboration_sigma = theano.shared(collaboration_sigma)
        self.collaboration_sigma_decay = collaboration_sigma_decay

        self.x_row = T.vector("exemplar")
        self.x_mat = T.matrix("batch")

        self.learning_rate = theano.shared(learning_rate)
        self.learning_rate_decay = learning_rate_decay

        self.distance_from_y_row = ((T.sub(self.W_shar_mat,
                                           self.x_row)**2).sum(axis=1))
        self.closest_neuron_idx = T.argmin(self.distance_from_y_row)
        self.distances_from_closest_neuron = self.D_shar_mat[
            self.closest_neuron_idx]
        self.affinities_to_closest_neuron = T.exp(
            -self.distances_from_closest_neuron /
            (self.collaboration_sigma)**2)

        self.smoothed_distances_from_closest_neuron = T.mul(
            self.distance_from_y_row,
            G.disconnected_grad(self.affinities_to_closest_neuron))
        self.cost_scal = self.smoothed_distances_from_closest_neuron.sum()

        self.updates = sgd(self.cost_scal, [self.W_shar_mat],
                           learning_rate=self.learning_rate)
        self.update_neurons = theano.function([self.x_row],
                                              self.cost_scal,
                                              updates=self.updates)
Example #36
0
 def __init__(self,
              weights,
              neurons_topology,
              relaxing_factor=-0.5,
              **kwargs):
     super(WinnerRelaxingSOM, self).__init__(weights, neurons_topology,
                                             **kwargs)
     self.wr_relaxing_factor = relaxing_factor
     self.wr_relaxing_member = (
         self.smoothed_distances_from_closest_neuron.sum() -
         self.smoothed_distances_from_closest_neuron[
             self.closest_neuron_idx])
     self.cost_scal += self.wr_relaxing_factor * self.learning_rate * T.mul(
         self.W_shar_mat[self.closest_neuron_idx],
         G.disconnected_grad(self.wr_relaxing_member)).sum()
     self.updates = sgd(self.cost_scal, [self.W_shar_mat],
                        learning_rate=self.learning_rate)
     self.update_neurons = theano.function([self.x_row],
                                           self.cost_scal,
                                           updates=self.updates)
Example #37
0
def margin_sensitivity(inputs, logits, labels, num_outputs, ord=2):
    """Compute margin sensitivity (proposed regularization).
    """
    assert ord in [2, np.inf]

    batch_size = inputs.shape[0]
    batch_indices = T.arange(batch_size)

    # shape: labels, batch, channels, height, width
    jac = jacobian(logits, inputs, num_outputs=num_outputs, pack_dim=0)

    # basically jac_labels = jac[labels, batch_indices]
    jac_flt = jac.reshape(
        (-1, inputs.shape[1], inputs.shape[2], inputs.shape[3]))
    jac_labels_flt = jac_flt[labels * batch_size + batch_indices]
    jac_labels = jac_labels_flt.reshape(inputs.shape)

    w = jac - T.shape_padaxis(jac_labels, axis=0)
    reduce_ind = range(2, inputs.ndim + 1)
    if ord == 2:
        dist = T.sum(w**2, axis=reduce_ind)
    elif ord == np.inf:
        dist = T.sum(T.abs_(w), axis=reduce_ind)
    else:
        raise ValueError

    l = T.argmax(dist, axis=0)
    l = gradient.disconnected_grad(l)

    corrects = logits[batch_indices, labels]
    others = logits[batch_indices, l]

    corrects_grad = T.grad(corrects.sum(), inputs)
    others_grad = T.grad(others.sum(), inputs)
    reduce_ind = range(1, inputs.ndim)
    if ord == 2:
        return T.sum((corrects_grad - others_grad)**2, axis=reduce_ind)
    elif ord == np.inf:
        return T.sum(T.abs_(corrects_grad - others_grad), axis=reduce_ind)
    else:
        raise ValueError
Example #38
0
    def cost(self, application_call, **kwargs):
        # pop inputs we know about
        inputs_mask = kwargs.pop('inputs_mask')
        labels = kwargs.pop('labels')
        labels_mask = kwargs.pop('labels_mask')

        # the rest is for bottom
        bottom_processed = self.bottom.apply(**kwargs)
        encoded, encoded_mask = self.encoder.apply(input_=bottom_processed,
                                                   mask=inputs_mask)
        encoded = self.top.apply(encoded)
        outs_forward = self.generators[0].evaluate(labels,
                                                   labels_mask,
                                                   attended=encoded,
                                                   attended_mask=encoded_mask)
        costs_forward, states_forward, _, _, _, _ = outs_forward
        outs_backward = self.generators[1].evaluate(
            labels[::-1],
            labels_mask[::-1] if labels_mask else None,
            attended=encoded[::-1],
            attended_mask=encoded_mask[::-1])
        costs_backward, states_backward, _, _, _, _ = outs_backward
        costs_backward = costs_backward[::-1]
        states_backward = states_backward[::-1]

        states_shape = states_forward.shape
        backward_predicted = self.forward_to_backward.apply(
            states_forward.reshape((states_shape[0] * states_shape[1], -1)))
        backward_predicted = backward_predicted.reshape(states_shape)
        backward_predicted = backward_predicted * labels_mask[:, :, None]

        states_backward = gradient.disconnected_grad(states_backward)
        states_backward = states_backward * labels_mask[:, :, None]
        l2_cost = ((backward_predicted - states_backward)**2).mean(axis=2)
        l2_cost.name = 'l2_cost_aux'
        application_call.add_auxiliary_variable(
            l2_cost.sum(axis=0).mean().copy(name='l2_cost_aux'))
        costs_forward_aux = (costs_forward.sum(axis=0).mean()).copy(
            name='costs_forward_aux')
        application_call.add_auxiliary_variable(costs_forward_aux)
        return costs_forward + costs_backward + 1.5 * l2_cost
Example #39
0
def fast_gradient_perturbation(inputs,
                               logits,
                               labels=None,
                               epsilon=0.3,
                               ord=np.inf):
    epsilon = floatX(epsilon)
    if labels is None:
        raise ValueError
    nll = categorical_crossentropy(logits, labels)
    grad = T.grad(nll.sum(), inputs, consider_constant=[labels])
    if ord == np.inf:
        perturbation = T.sgn(grad)
    elif ord == 1:
        sum_ind = list(range(1, inputs.ndim))
        perturbation = grad / T.sum(T.abs_(grad), axis=sum_ind, keepdims=True)
    elif ord == 2:
        sum_ind = list(range(1, inputs.ndim))
        perturbation = grad / T.sqrt(
            T.sum(grad**2, axis=sum_ind, keepdims=True))
    perturbation *= epsilon
    return gradient.disconnected_grad(perturbation)
def __step(img, prev_bbox, prev_att, state, prev_conf, prev_sugg, prev_W, prev_b, prev_pos, prev_neg, timestep):
	cx = (prev_bbox[:, 2] + prev_bbox[:, 0]) / 2.
	cy = (prev_bbox[:, 3] + prev_bbox[:, 1]) / 2.
	sigma = TT.exp(prev_att[:, 0]) * (max(img_col, img_row) / 2)
	fract = TT.exp(prev_att[:, 1])
        amplifier = TT.exp(prev_att[:, 2])

        eps = 1e-8

	abs_cx = (cx + 1) / 2. * (img_col - 1)
	abs_cy = (cy + 1) / 2. * (img_row - 1)
	abs_stride = (fract * (max(img_col, img_row) - 1)) * ((1. / (NUM_N - 1.)) if NUM_N > 1 else 0)

	FX, FY = __filterbank(abs_cx, abs_cy, abs_stride, sigma)
	unnormalized_mask = (FX.dimshuffle(0, 'x', 1, 'x', 2) * FY.dimshuffle(0, 1, 'x', 2, 'x')).sum(axis=2).sum(axis=1)
	mask = unnormalized_mask# / (unnormalized_mask.sum(axis=2).sum(axis=1) + eps).dimshuffle(0, 'x', 'x')
	masked_img = img

	conv1 = conv2d(masked_img, conv1_filters, subsample=(conv1_stride, conv1_stride))
	act1 = TT.tanh(conv1)
	flat1 = TT.reshape(act1, (-1, conv1_output_dim))
	gru_in = TT.concatenate([flat1, prev_bbox, prev_conf.reshape((batch_size, 1)), prev_sugg], axis=1)
	gru_z = NN.sigmoid(TT.dot(gru_in, Wz) + TT.dot(state, Uz) + bz)
	gru_r = NN.sigmoid(TT.dot(gru_in, Wr) + TT.dot(state, Ur) + br)
	gru_h_ = TT.tanh(TT.dot(gru_in, Wg) + TT.dot(gru_r * state, Ug) + bg)
	gru_h = (1 - gru_z) * state + gru_z * gru_h_
	bbox = TT.tanh(TT.dot(gru_h, W_fc2) + b_fc2)
	att = TT.dot(gru_h, W_fc3) + b_fc3

	def batch_dot(a, b):
		return (a.dimshuffle(0, 1, 2, 'x') * b.dimshuffle(0, 'x', 1, 2)).sum(axis=2)

	def bounding(bbox):
		return TT.stack([TT.maximum(bbox[:, 0], -1), TT.minimum(bbox[:, 1], 1), TT.maximum(bbox[:, 2], -1), TT.minimum(bbox[:, 3], 1)], axis=1)

	def sample_positives(bbox):
		x0 = bbox[:, 0]
		y0 = bbox[:, 1]
		x1 = bbox[:, 2]
		y1 = bbox[:, 3]
		return TT.stack([bounding(TT.as_tensor([x0, y0, x1, y1]).T),
				 bounding(TT.as_tensor([x0 * 0.75 + x1 * 0.25, y0, x1, y1]).T),
				 bounding(TT.as_tensor([x0, y0 * 0.75 + y1 * 0.25, x1, y1]).T),
				 bounding(TT.as_tensor([x0, y0, x1 * 0.75 + x0 * 0.25, y1]).T),
				 bounding(TT.as_tensor([x0, y0, x1, y1 * 0.75 + y0 * 0.25]).T),
				 bounding(TT.as_tensor([x0 * 1.25 - x1 * 0.25, y0, x1, y1]).T),
				 bounding(TT.as_tensor([x0, y0 * 1.25 - y1 * 0.25, x1, y1]).T),
				 bounding(TT.as_tensor([x0, y0, x1 * 1.25 - x0 * 0.25, y1]).T),
				 bounding(TT.as_tensor([x0, y0, x1, y1 * 1.25 - y0 * 0.25]).T),
				], axis=1)

	def sample_negatives(bbox):
		x0 = bbox[:, 0]
		y0 = bbox[:, 1]
		x1 = bbox[:, 2]
		y1 = bbox[:, 3]
		return TT.stack([bounding(TT.as_tensor([x0 * 0.5 + x1 * 0.5, y0, x1, y1]).T),
				 bounding(TT.as_tensor([x0, y0 * 0.5 + y1 * 0.5, x1, y1]).T),
				 bounding(TT.as_tensor([x0, y0, x1 * 0.5 + x0 * 0.5, y1]).T),
				 bounding(TT.as_tensor([x0, y0, x1, y1 * 0.5 + y0 * 0.5]).T),
				 bounding(TT.as_tensor([x0 * 1.5 - x1 * 0.5, y0, x1 * 0.5 + x0 * 0.5, y1]).T),
				 bounding(TT.as_tensor([x0, y0 * 1.5 - y1 * 0.5, x1, y1 * 0.5 + y0 * 0.5]).T),
				 bounding(TT.as_tensor([x0 * 0.5 + x1 * 0.5, y0, x1 * 1.5 - x0 * 0.5, y1]).T),
				 bounding(TT.as_tensor([x0, y0 * 0.5 + y1 * 0.5, x1, y1 * 1.5 - y0 * 0.5]).T),
				], axis=1)

	def sample_around(bbox):
		return TT.concatenate([sample_positives(bbox), sample_negatives(bbox)], axis=1)

	crop = batch_multicrop(bbox.dimshuffle(0, 'x', 1), img)
	feat = conv2d(crop.reshape((batch_size, 1, img_row, img_col)), conv1_filters, subsample=(conv1_stride, conv1_stride)).reshape((batch_size, 1, -1))
	conf = NN.sigmoid(batch_dot(feat, prev_W) + TT.addbroadcast(prev_b, 1))

	nr_samples = 17
	sugg_bbox = sample_around(bbox)		# (batch_size, nr_samples, 4)
	sugg_crop = batch_multicrop(sugg_bbox, img)
	sugg_feat = conv2d(sugg_crop.reshape((batch_size * nr_samples, 1, img_row, img_col)), conv1_filters, subsample=(conv1_stride, conv1_stride)).reshape((batch_size, nr_samples, -1))
	sugg_conf = batch_dot(sugg_feat, prev_W) + TT.addbroadcast(prev_b, 1)
	print sugg_conf.dtype
	sugg_pos = TT.cast(sugg_conf > 0, T.config.floatX)
	print sugg_pos.dtype
	sugg = TG.disconnected_grad((sugg_bbox * TT.patternbroadcast(sugg_pos, [False, False, True])).sum(axis=1) / TT.patternbroadcast(sugg_pos.sum(axis=1), [False, True]))

	def classify(x, W, b):
		# x: (batch_size, samples_per_batch, feature_per_sample)
		return NN.sigmoid(batch_dot(x, W) + TT.addbroadcast(b, 1))

	def update_step(W, b, x, y, alpha=1):
		y_hat = classify(x, W, b)
		loss = ((y_hat - y) ** 2).mean()
		g = T.grad(loss, [W, b])
		return (W - alpha * g[0], b - alpha * g[1], loss), T.scan_module.until(loss < 0.01)

	nr_samples = 9
	pos_bbox = sample_positives(bbox)
	pos_crop = batch_multicrop(pos_bbox, img)
	pos_feat = conv2d(pos_crop.reshape((batch_size * nr_samples, 1, img_row, img_col)), conv1_filters, subsample=(conv1_stride, conv1_stride)).reshape((batch_size, nr_samples, -1))
	pos = TG.disconnected_grad(TT.set_subtensor(prev_pos[:, (nr_samples*timestep):(nr_samples*(timestep+1))], pos_feat))
	nr_samples = 8
	neg_bbox = sample_negatives(bbox)
	neg_crop = batch_multicrop(neg_bbox, img)
	neg_feat = conv2d(neg_crop.reshape((batch_size * nr_samples, 1, img_row, img_col)), conv1_filters, subsample=(conv1_stride, conv1_stride)).reshape((batch_size, nr_samples, -1))
	neg = TG.disconnected_grad(TT.set_subtensor(prev_neg[:, (nr_samples*timestep):(nr_samples*(timestep+1))], neg_feat))
	update_scan, _ = T.scan(fn=update_step,
				outputs_info=[prev_W, prev_b, None],
                                non_sequences=[TT.concatenate([pos[:, :9*timestep], neg[:, :8*timestep]], axis=1), TT.concatenate([TT.ones((batch_size, 9*timestep, 1)), -TT.ones((batch_size, 8*timestep, 1))], axis=1)], n_steps=1000)
	new_W, new_b = TG.disconnected_grad(update_scan[0][-1]), TG.zero_grad(update_scan[1][-1])

	return bbox, att, gru_h, TT.unbroadcast(conf, 1), sugg, new_W, TT.unbroadcast(new_b, 1), pos, neg, timestep + 1
Example #41
0
def dg2(x):
    return disconnected_grad(disconnected_grad(x))
Example #42
0
    
    
    def approx_grad(self,Xvec,mcw):
        X = Xvec.reshape((-1,self.ndim))    
        means,covars,weights,_ = self.split_params(mcw)
        log_prob = calc_log_prob_gmm_componetwise(X,means,covars,weights)
        w = T.nnet.softmax(log_prob)
        s_w = T.sum(w,0)
        w_means = T.sum(w[:,:,None]*X[:,None,:],0)/(s_w[:,None]+0.0001)
        w_covars = T.sum(w[:,:,None]*((w_means[None,:,:]-X[:,None,:])**2),0)/(s_w[:,None]+0.0001)
        w_mcw = T.concatenate((w_means.flatten(),w_covars.flatten(),weights))
        return jacobian(w_mcw,[Xvec],consider_constant=[mcw,Xvec,w,s_w])[0]
    
    
    def grad(self, (Yvec,), output_grads):
        Yvec = gradient.disconnected_grad(Yvec)
        mcw_vec = GMMOp(self.gm_num,self.ndim,self.gmm)(Yvec)
        if(self.use_approx_grad):
            return  [output_grads[0].dot(self.approx_grad(Yvec,mcw_vec))]
        else:
            lam = Yvec.shape[0]//self.ndim
            mcwl_vec = T.concatenate((mcw_vec,lam.reshape((1,))))
            N,M = self.build_linear_system(Yvec,mcwl_vec)
            dX = self.solve_linear_system(N,M)
            return [output_grads[0].dot(gradient.disconnected_grad(dX[0:dX.shape[0]-1, :]))]

def get_gmm(X,gm_num,ndims,use_approx_grad=False,covariance_type='diag'):
    if(gm_num == 1):
        means = T.mean(X,0).reshape((1,-1))
        covars = (T.std(X,0)**2).reshape((1,-1))+1e-8
        weights = T.ones(1)
Example #43
0
    def encode(self, belief_t, degree_t, intent_t,
            masked_source_t, masked_source_len_t,
            masked_target_t, masked_target_len_t, 
            utt_group_t, sample_t=None):
        
        # prepare belief state vector
        belief_t = G.disconnected_grad(T.concatenate(belief_t,axis=0))
        ##########################
        # prior parameterisarion #
        ##########################
        hidden_t = T.tanh( T.dot(belief_t,self.Ws1)+
                    T.dot(degree_t,self.Ws2)+
                    T.dot(intent_t,self.Ws3))
        prior_t  = T.nnet.softmax(
                    T.dot( T.tanh(
                        T.dot(hidden_t,self.Wp1)+self.bp1),
                        self.Wp2) )
       
        ##############################
        # posterior parameterisation #
        ##############################
        # response encoding
        target_intent_t = bidirectional_encode(
                self.tfEncoder, self.tbEncoder,
                masked_target_t, masked_target_len_t  )
        source_intent_t = bidirectional_encode(
                self.sfEncoder, self.sbEncoder,
                masked_source_t, masked_source_len_t  )
        # scores before softmax layer
        q_logit_t = T.dot(T.tanh( T.dot(belief_t,self.Wq1)+
                        T.dot(degree_t,self.Wq2)+
                        T.dot(source_intent_t,self.Wq3)+
                        T.dot(target_intent_t,self.Wq4)),
                    self.Wq5 )

        # sampling from a scaled posterior
        if self.sample_mode=='posterior':
            print '\t\tSampling from posterior ...'
            posterior_t= T.nnet.softmax(q_logit_t) 
            z_t = T.switch( T.lt(utt_group_t,self.dl-1),
                    utt_group_t,
                    G.disconnected_grad( T.argmax( 
                      self.srng.multinomial(
                      pvals=posterior_t,dtype='float32')[0])  )
                  )
        else:
            # choose to use the current sample or ground truth
            print '\t\tSampling from prior ...'
            z_t = T.switch( T.lt(utt_group_t,self.dl-1),
                    utt_group_t, sample_t)
        
        # put sample into decoder to decode 
        hidden_t = T.nnet.sigmoid(self.Wd2[z_t,:]+self.bd1)*hidden_t
        actEmb_t = T.tanh(T.dot(
                            T.concatenate( [T.tanh(self.Wd1[z_t,:]),hidden_t],axis=0 ),
                          self.Wd3)).dimshuffle('x',0)
        
        # return the true posterior
        posterior_t= T.nnet.softmax(q_logit_t)
        
        # compute baseline estimate
        b_t = self.baseline.encode(belief_t,degree_t,source_intent_t,target_intent_t)

        return actEmb_t, prior_t[0], posterior_t[0], z_t, b_t, posterior_t