Example #1
0
def gate_layer(tparams, X_word, X_char, options, prefix, pretrain_mode, activ='lambda x: x', **kwargs):
    """ 
    compute the forward pass for a gate layer

    Parameters
    ----------
    tparams        : OrderedDict of theano shared variables, {parameter name: value}
    X_word         : theano 3d tensor, word input, dimensions: (num of time steps, batch size, dim of vector)
    X_char         : theano 3d tensor, char input, dimensions: (num of time steps, batch size, dim of vector)
    options        : dictionary, {hyperparameter: value}
    prefix         : string, layer name
    pretrain_mode  : theano shared scalar, 0. = word only, 1. = char only, 2. = word & char
    activ          : string, activation function: 'liner', 'tanh', or 'rectifier'

    Returns
    -------
    X              : theano 3d tensor, final vector, dimensions: (num of time steps, batch size, dim of vector)

    """      
    # compute gating values, Eq.(3)
    G = tensor.nnet.sigmoid(tensor.dot(X_word, tparams[p_name(prefix, 'v')]) + tparams[p_name(prefix, 'b')][0])
    X = ifelse(tensor.le(pretrain_mode, numpy.float32(1.)),  
               ifelse(tensor.eq(pretrain_mode, numpy.float32(0.)), X_word, X_char),
               G[:, :, None] * X_char + (1. - G)[:, :, None] * X_word)   
    return eval(activ)(X)
Example #2
0
    def get_aggregator(self):
        initialized = shared_like(0.)
        numerator_acc = shared_like(self.numerator)
        denominator_acc = shared_like(self.denominator)

        # Dummy default expression to use as the previously-aggregated
        # value, that has the same shape as the new result
        numerator_zeros = tensor.as_tensor(self.numerator).zeros_like()
        denominator_zeros = tensor.as_tensor(self.denominator).zeros_like()

        conditional_update_num = self.numerator + ifelse(initialized,
                                                         numerator_acc,
                                                         numerator_zeros)
        conditional_update_den = self.denominator + ifelse(initialized,
                                                           denominator_acc,
                                                           denominator_zeros)

        initialization_updates = [(numerator_acc,
                                   tensor.zeros_like(numerator_acc)),
                                  (denominator_acc,
                                   tensor.zeros_like(denominator_acc)),
                                  (initialized, 0.)]
        accumulation_updates = [(numerator_acc,
                                 conditional_update_num),
                                (denominator_acc,
                                 conditional_update_den),
                                (initialized, 1.)]
        aggregator = Aggregator(aggregation_scheme=self,
                                initialization_updates=initialization_updates,
                                accumulation_updates=accumulation_updates,
                                readout_variable=(numerator_acc /
                                                  denominator_acc))
        return aggregator
Example #3
0
File: rae.py Project: zomux/nlpy
    def _recursive_step(self, i, regs, tokens, seqs, back_routes, back_lens):
        seq = seqs[i]
        # Encoding
        left, right, target = seq[0], seq[1], seq[2]

        left_rep = ifelse(T.lt(left, 0), tokens[-left], regs[left])
        right_rep = ifelse(T.lt(right, 0), tokens[-right], regs[right])

        rep = self._encode_computation(left_rep, right_rep)

        if self.deep:
            inter_rep = rep
            rep = self._deep_encode(inter_rep)
        else:
            inter_rep = T.constant(0)


        new_regs = T.set_subtensor(regs[target], rep)

        back_len = back_lens[i]

        back_reps, lefts, rights = self._unfold(back_routes[i], new_regs, back_len)
        gf_W_d1, gf_W_d2, gf_B_d1, gf_B_d2, distance, rep_gradient = self._unfold_gradients(back_reps, lefts, rights, back_routes[i],
                                                                    tokens, back_len)

        return ([rep, inter_rep, left_rep, right_rep, new_regs, rep_gradient, distance],
                self.decode_optimizer.setup([self.W_d1, self.W_d2, self.B_d1, self.B_d2],
                                    [gf_W_d1, gf_W_d2, gf_B_d1, gf_B_d2], method=self.optimization, beta=self.beta))
Example #4
0
def beta_div(X, W, H, beta):
    """Compute beta divergence D(X|WH)

    Parameters
    ----------
    X : Theano tensor
        data
    W : Theano tensor
        Bases
    H : Theano tensor
        activation matrix
    beta : Theano scalar


    Returns
    -------
    div : Theano scalar
        beta divergence D(X|WH)"""
    div = ifelse(
      T.eq(beta, 2),
      T.sum(1. / 2 * T.power(X - T.dot(H, W), 2)),
      ifelse(
        T.eq(beta, 0),
        T.sum(X / T.dot(H, W) - T.log(X / T.dot(H, W)) - 1),
        ifelse(
          T.eq(beta, 1),
          T.sum(T.mul(X, (T.log(X) - T.log(T.dot(H, W)))) + T.dot(H, W) - X),
          T.sum(1. / (beta * (beta - 1.)) * (T.power(X, beta) +
                (beta - 1.) * T.power(T.dot(H, W), beta) -
                beta * T.power(T.mul(X, T.dot(H, W)), (beta - 1)))))))
    return div
Example #5
0
def momentum_normscaled(loss, all_params, lr, mom, batch_size, max_norm=np.inf, weight_decay=0.0,verbose=False):
    updates = []
    #all_grads = [theano.grad(loss, param) for param in all_params]
    all_grads = theano.grad(gradient_clipper(loss),all_params)

    grad_lst = [ T.sum( (  grad / float(batch_size) )**2  ) for grad in all_grads ]
    grad_norm = T.sqrt( T.sum( grad_lst ))
    if verbose:
        grad_norm = theano.printing.Print('MOMENTUM GRAD NORM1:')(grad_norm)

    all_grads = ifelse(T.gt(grad_norm, max_norm),
                       [grads*(max_norm / grad_norm) for grads in all_grads],
                       all_grads)


    if verbose:
        grad_lst = [ T.sum( (  grad / float(batch_size) )**2  ) for grad in all_grads ]
        grad_norm = T.sqrt( T.sum( grad_lst ))
        grad_norm = theano.printing.Print('MOMENTUM GRAD NORM2:')(grad_norm)
        all_grads = ifelse(T.gt(grad_norm, np.inf),
                           [grads*(max_norm / grad_norm) for grads in all_grads],
                           all_grads)

    for param_i, grad_i in zip(all_params, all_grads):
        mparam_i = theano.shared(np.zeros(param_i.get_value().shape, dtype=theano.config.floatX))
        v = mom * mparam_i - lr*(weight_decay*param_i + grad_i)

        updates.append( (mparam_i, v) )
        updates.append( (param_i, param_i + v) )

    return updates
    def _forward(self):
        eps = self.eps

        param_size = (1, 1, self.n_output, 1, 1)
        self.gamma = self.declare(param_size)
        self.beta = self.declare(param_size)

        mean = self.inpt.mean(axis=[0, 1, 3, 4], keepdims=False)
        std = self.inpt.std(axis=[0, 1, 3, 4], keepdims=False)

        self._setup_running_metrics(self.n_output)
        self.running_mean.default_update = ifelse(
            self.training,
            (1.0 - self.alpha) * self.running_mean + self.alpha * mean,
            self.running_mean
        )
        self.running_std.default_update = ifelse(
            self.training,
            (1.0 - self.alpha) * self.running_std + self.alpha * std,
            self.running_std
        )

        # This will be optimized away, but ensures the running mean and the running std get updated.
        # Reference: https://gist.github.com/f0k/f1a6bd3c8585c400c190#file-batch_norm-py-L86
        mean += 0 * self.running_mean
        std += 0 * self.running_std

        use_mean = ifelse(self.training, mean, self.running_mean)
        use_std = ifelse(self.training, std, self.running_std)

        use_mean = use_mean.dimshuffle('x', 'x', 0, 'x', 'x')
        use_std = use_std.dimshuffle('x', 'x', 0, 'x', 'x')
        norm_inpt = (self.inpt - use_mean) / (use_std + eps)
        self.output = self.gamma * norm_inpt + self.beta
Example #7
0
def more_complex_test():
    notimpl = NotImplementedOp()
    ifelseifelseif = IfElseIfElseIf()

    x1 = T.scalar('x1')
    x2 = T.scalar('x2')
    c1 = T.scalar('c1')
    c2 = T.scalar('c2')
    t1 = ifelse(c1, x1, notimpl(x2))
    t1.name = 't1'
    t2 = t1 * 10
    t2.name = 't2'
    t3 = ifelse(c2, t2, x1 + t1)
    t3.name = 't3'
    t4 = ifelseifelseif(T.eq(x1, x2), x1, T.eq(x1, 5), x2, c2, t3, t3 + 0.5)
    t4.name = 't4'

    f = function([c1, c2, x1, x2], t4, mode=Mode(linker='vm',
                                                 optimizer='fast_run'))
    if theano.config.vm.lazy is False:
        try:
            f(1, 0, numpy.array(10, dtype=x1.dtype), 0)
            assert False
        except NotImplementedOp.E:
            pass
    else:
        print(f(1, 0, numpy.array(10, dtype=x1.dtype), 0))
        assert f(1, 0, numpy.array(10, dtype=x1.dtype), 0) == 20.5
    print('... passed')
Example #8
0
File: model.py Project: ivanhe/rnn
 def build_model(self):
   print '\n... building the model with unroll=%d, backroll=%d' \
     % (self.source.unroll, self.source.backroll)
   x = T.imatrix('x')
   y = T.imatrix('y')
   reset = T.scalar('reset')
   hiddens = [h['init'] for h in self.hiddens.values()]
   outputs_info = [None] * 3 + hiddens
   [losses, probs, errors, hids], updates = \
     theano.scan(self.step, sequences=[x, y], outputs_info=outputs_info)
   loss = losses.sum()
   error = errors.sum() / T.cast((T.neq(y, 255).sum()), floatX)
   hidden_updates_train = []
   hidden_updates_test = []
   for h in self.hiddens.values():
     h_train = ifelse(T.eq(reset, 0), \
       hids[-1-self.source.backroll, :], T.ones_like(h['init']))
     h_test = ifelse(T.eq(reset, 0), \
       hids[-1, :], T.ones_like(h['init']))
     hidden_updates_train.append((h['init'], h_train))
     hidden_updates_test.append((h['init'], h_test))
   updates = self.source.get_updates(loss, self.sgd_params)
   updates += hidden_updates_train
   rets = [loss, probs[-1, :], error]
   mode = theano.Mode(linker='cvm')
   train_model = theano.function([x, y, reset, self.lr], rets, \
     updates=updates, mode=mode)
   test_model = theano.function([x, y, reset], rets, \
     updates=hidden_updates_test, mode=mode)
   return train_model, test_model
def build_model(shared_params, options, other_params):
    """
    Build the complete neural network model and return the symbolic variables
    """
    # symbolic variables
    x = tensor.matrix(name="x", dtype=floatX)
    y1 = tensor.iscalar(name="y1")
    y2 = tensor.iscalar(name="y2")

    # lstm cell
    (ht, ct) = lstm_cell(x, shared_params, options, other_params)  # gets the ht, ct
    # softmax 1 i.e. frame type prediction
    activation = tensor.dot(shared_params['softmax1_W'], ht).transpose() + shared_params['softmax1_b']
    frame_pred = tensor.nnet.softmax(activation) # .transpose()

    # softmax 2 i.e. gesture class prediction
    #

    # predicted probability for frame type
    f_pred_prob = theano.function([x], frame_pred, name="f_pred_prob")
    # predicted frame type
    f_pred = theano.function([x], frame_pred.argmax(), name="f_pred")

    # cost
    cost = ifelse(tensor.eq(y1, 1), -tensor.log(frame_pred[0, 0] + options['log_offset'])
                  * other_params['begin_cost_factor'],
                  ifelse(tensor.eq(y1, 2), -tensor.log(frame_pred[0, 1] + options['log_offset'])
                         * other_params['end_cost_factor'],
                         ifelse(tensor.eq(y1, 3), -tensor.log(frame_pred[0, 2] + options['log_offset']),
                                tensor.abs_(tensor.log(y1)))), name='ifelse_cost')

    # function for output of the currect lstm cell and softmax prediction
    f_model_cell_output = theano.function([x], (ht, ct, frame_pred), name="f_model_cell_output")
    # return the model symbolic variables and theano functions
    return x, y1, y2, f_pred_prob, f_pred, cost, f_model_cell_output
Example #10
0
def norm_col(w, h):
    """normalize the column vector w (Theano function).
    Apply the invert normalization on h such that w.h does not change

    Parameters
    ----------
    w: Theano vector
        vector to be normalised
    h: Ttheano vector
        vector to be normalised by the invert normalistation

    Returns
    -------
    w : Theano vector with the same shape as w
        normalised vector (w/norm)
    h : Theano vector with the same shape as h
        h*norm
    """
    norm = w.norm(2, 0)
    eps = 1e-12
    size_norm = (T.ones_like(w)).norm(2, 0)
    w = ifelse(T.gt(norm, eps),
               w/norm,
               (w+eps)/(eps*size_norm).astype(theano.config.floatX))
    h = ifelse(T.gt(norm, eps),
               h*norm,
               (h*eps*size_norm).astype(theano.config.floatX))
    return w, h
Example #11
0
File: optim.py Project: gburt/iaf
def AdaMaxAvg2(ws, objective, alpha=.01, beta1=.1, beta2=.001, beta3=0.01, n_accum=1):
    if n_accum == 1:
        return AdaMaxAvg(ws, objective, alpha, beta1, beta2, beta3)
    print 'AdaMax_Avg2', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2,'beta3:',beta3,'n_accum:',n_accum
    
    gs = G.ndict.T_grad(objective.sum(), ws, disconnected_inputs='raise')

    new = OrderedDict()
    
    from theano.ifelse import ifelse
    it = G.sharedf(0.)
    new[it] = it + 1
    reset = T.eq(T.mod(it,n_accum), 0)
    update = T.eq(T.mod(it,n_accum), n_accum-1)
    
    ws_avg = []
    for j in range(len(ws)):
        w_avg = {}
        for i in ws[j]:
            _w = ws[j][i]
            _g = gs[j][i]
            #_g = T.switch(T.isnan(_g),T.zeros_like(_g),_g) #remove NaN's
            mom1 = G.sharedf(_w.get_value() * 0.)
            _max = G.sharedf(_w.get_value() * 0.)
            w_avg[i] = G.sharedf(_w.get_value())
            g_sum = G.sharedf(_w.get_value() * 0.)
        
            new[g_sum] = ifelse(reset, _g, g_sum + _g)
            new[mom1] = ifelse(update, (1-beta1) * mom1 + beta1 * new[g_sum], mom1)
            new[_max] = ifelse(update, T.maximum((1-beta2)*_max, abs(new[g_sum]) + 1e-8), _max)
            new[_w] = ifelse(update, _w + alpha *  new[mom1] / new[_max], _w)
            new[w_avg[i]] = ifelse(update, beta3 * new[_w] + (1.-beta3) * w_avg[i], w_avg[i])
        ws_avg += [w_avg]   
    return new, ws_avg
Example #12
0
    def get_aggregator(self):
        initialized = shared_like(0.)
        numerator_acc = shared_like(self.numerator)
        denominator_acc = shared_like(self.denominator)

        conditional_update_num = ifelse(initialized,
                                        self.numerator + numerator_acc,
                                        self.numerator)
        conditional_update_den = ifelse(initialized,
                                        self.denominator + denominator_acc,
                                        self.denominator)

        initialization_updates = [(numerator_acc,
                                   tensor.zeros_like(numerator_acc)),
                                  (denominator_acc,
                                   tensor.zeros_like(denominator_acc)),
                                  (initialized, 0.)]
        accumulation_updates = [(numerator_acc,
                                 conditional_update_num),
                                (denominator_acc,
                                 conditional_update_den),
                                (initialized, 1.)]
        aggregator = Aggregator(aggregation_scheme=self,
                                initialization_updates=initialization_updates,
                                accumulation_updates=accumulation_updates,
                                readout_variable=(numerator_acc /
                                                  denominator_acc))
        return aggregator
Example #13
0
 def build(self, output, tparams=None, BNparams=None):
     if self.BN_mode:
         self.BN_eps = npt(self.BN_eps)
         if not hasattr(self, 'BN_mean'):
             self.BN_mean = T.mean(output)
         if not hasattr(self, 'BN_std'):
             m2 = (1 + 1 / (T.prod(output.shape) - 1)).astype(floatX)
             self.BN_std = T.sqrt(m2 * T.var(output) + self.BN_eps)
         if self.BN_mode == 2:
             t_mean = T.mean(output, axis=[0, 2, 3], keepdims=True)
             t_var = T.var(output, axis=[0, 2, 3], keepdims=True)
             BN_mean = BNparams[p_(self.prefix, 'mean')].dimshuffle(
                 'x', 0, 'x', 'x')
             BN_std = BNparams[p_(self.prefix, 'std')].dimshuffle(
                 'x', 0, 'x', 'x')
             output = ifelse(
                 self.training,
                 (output - t_mean) / T.sqrt(t_var + self.BN_eps),
                 (output - BN_mean) / BN_std)
             output *= tparams[p_(self.prefix, 'BN_scale')].dimshuffle(
                 'x', 0, 'x', 'x')
             output += tparams[p_(self.prefix, 'BN_shift')].dimshuffle(
                 'x', 0, 'x', 'x')
         elif self.BN_mode == 1:
             t_mean = T.mean(output)
             t_var = T.var(output)
             output = ifelse(
                 self.training,
                 (output - t_mean) / T.sqrt(t_var + self.BN_eps),
                 ((output - BNparams[p_(self.prefix, 'mean')])
                  / BNparams[p_(self.prefix, 'std')]))
             output *= tparams[p_(self.prefix, 'BN_scale')]
             output += tparams[p_(self.prefix, 'BN_shift')]
     self.output = self.activation(output)
Example #14
0
 def call(self, vals, mask=None):
     
     block_out = vals[0]
     prev_out = vals[1]
     test_out = self.zi * block_out
     
     return ifelse(self.test, test_out, ifelse(self.zi,block_out,prev_out))
Example #15
0
def get_sensi_speci(y_hat, y):
    # y_hat = T.concatenate(T.sum(input=y_hat[:, 0:2], axis=1), T.sum(input=y_hat[:, 2:], axis=1))
    y_hat = T.stacklists([y_hat[:, 0] + y_hat[:, 1], y_hat[:, 2] + y_hat[:, 3] + y_hat[:, 4]]).T
    y_hat = T.argmax(y_hat)

    tag = 10 * y_hat + y
    tneg = T.cast((T.shape(tag[(T.eq(tag, 0.)).nonzero()]))[0], config.floatX)
    fneg = T.cast((T.shape(tag[(T.eq(tag, 1.)).nonzero()]))[0], config.floatX)
    fpos = T.cast((T.shape(tag[(T.eq(tag, 10.)).nonzero()]))[0], config.floatX)
    tpos = T.cast((T.shape(tag[(T.eq(tag, 11.)).nonzero()]))[0], config.floatX)



    # assert fneg + fneg + fpos + tpos == 1380
    # tneg.astype(config.floatX)
    # fneg.astype(config.floatX)
    # fpos.astype(config.floatX)
    # tpos.astype(config.floatX)

    speci = ifelse(T.eq((tneg + fpos), 0), np.float64(float('inf')), tneg / (tneg + fpos))
    sensi = ifelse(T.eq((tpos + fneg), 0), np.float64(float('inf')), tpos / (tpos + fneg))

    # keng die!!!
    # if T.eq((tneg + fpos), 0):
    #     speci = float('inf')
    # else:
    #     speci = tneg // (tneg + fpos)
    # if T.eq((tpos + fneg), 0.):
    #     sensi = float('inf')
    # else:
    #     sensi = tpos // (tpos + fneg)

    # speci.astype(config.floatX)
    # sensi.astype(config.floatX)
    return [sensi, speci]
Example #16
0
    def test_merge_ifs_true_false(self):
        raise SkipTest("Optimization temporarily disabled")
        x1 = tensor.scalar('x1')
        x2 = tensor.scalar('x2')
        y1 = tensor.scalar('y1')
        y2 = tensor.scalar('y2')
        w1 = tensor.scalar('w1')
        w2 = tensor.scalar('w2')
        c = tensor.iscalar('c')

        out = ifelse(c,
            ifelse(c, x1, x2) + ifelse(c, y1, y2) + w1,
            ifelse(c, x1, x2) + ifelse(c, y1, y2) + w2)
        f = theano.function([x1, x2, y1, y2, w1, w2, c], out,
                            allow_input_downcast=True)
        assert len([x for x in f.maker.env.toposort()
                if isinstance(x.op, IfElse)]) == 1

        rng = numpy.random.RandomState(utt.fetch_seed())
        vx1 = rng.uniform()
        vx2 = rng.uniform()
        vy1 = rng.uniform()
        vy2 = rng.uniform()
        vw1 = rng.uniform()
        vw2 = rng.uniform()
        assert numpy.allclose(f(vx1, vx2, vy1, vy2, vw1, vw2, 1),
                              vx1 + vy1 + vw1)
        assert numpy.allclose(f(vx1, vx2, vy1, vy2, vw1, vw2, 0),
                              vx2 + vy2 + vw2)
Example #17
0
  def __init__(self, factor=numpy.sqrt(2), decay=1.0, min_factor=None, padding=False, **kwargs):
    super(ConvFMPLayer, self).__init__(**kwargs)
    if min_factor is None:
      min_factor = factor
    factor = T.maximum(factor * (decay ** self.network.epoch), numpy.float32(min_factor))
    sizes_raw = self.source.output_sizes

    # handle size problems
    if not padding:
      padding = T.min(self.source.output_sizes / factor) <= 0
      padding = theano.printing.Print(global_fn=maybe_print_pad_warning)(padding)

    fixed_sizes = T.maximum(sizes_raw, T.cast(T.as_tensor(
      [factor + self.filter_height - 1, factor + self.filter_width - 1]), 'float32'))
    sizes = ifelse(padding, fixed_sizes, sizes_raw)
    X_size = T.cast(T.max(sizes, axis=0), "int32")

    def pad_fn(x_t, s):
      x = T.alloc(numpy.cast["float32"](0), X_size[0], X_size[1], self.X.shape[3])
      x = T.set_subtensor(x[:s[0], :s[1]], x_t[:s[0], :s[1]])
      return x

    fixed_X, _ = theano.scan(pad_fn, [self.X.dimshuffle(2, 0, 1, 3), T.cast(sizes_raw, "int32")])
    fixed_X = fixed_X.dimshuffle(1, 2, 0, 3)
    self.X = ifelse(padding, T.unbroadcast(fixed_X, 3), self.X)

    conv_out = CuDNNConvHWBCOpValidInstance(self.X, self.W, self.b)
    conv_out_sizes = self.conv_output_size_from_input_size(sizes)
    self.output, self.output_sizes = fmp(conv_out, conv_out_sizes, T.cast(factor,'float32'))
Example #18
0
    def test_pushout1(self):
        raise SkipTest("Optimization temporarily disabled")
        x1 = tensor.scalar('x1')
        x2 = tensor.scalar('x2')
        y1 = tensor.scalar('y1')
        y2 = tensor.scalar('y2')
        w1 = tensor.scalar('w1')
        w2 = tensor.scalar('w2')
        c = tensor.iscalar('c')
        x, y = ifelse(c, (x1, y1), (x2, y2), name='f1')
        z = ifelse(c, w1, w2, name='f2')
        out = x * z * y

        f = theano.function([x1, x2, y1, y2, w1, w2, c], out,
                            allow_input_downcast=True)
        assert isinstance(f.maker.env.toposort()[-1].op, IfElse)
        rng = numpy.random.RandomState(utt.fetch_seed())
        vx1 = rng.uniform()
        vx2 = rng.uniform()
        vy1 = rng.uniform()
        vy2 = rng.uniform()
        vw1 = rng.uniform()
        vw2 = rng.uniform()

        assert numpy.allclose(f(vx1, vx2, vy1, vy2, vw1, vw2, 1),
                              vx1 * vy1 * vw1)
        assert numpy.allclose(f(vx1, vx2, vy1, vy2, vw1, vw2, 0),
                              vx2 * vy2 * vw2)
Example #19
0
 def decay(self):
     updates = []
     new_batch = ifelse(T.gt(self.batch, self.decay_batch), sharedX(0), self.batch+1)
     new_lr = ifelse(T.gt(self.batch, self.decay_batch), self.lr*self.lr_decay_factor, self.lr)
     updates.append((self.batch, new_batch))
     updates.append((self.lr, new_lr))
     return updates
Example #20
0
def gradients(cost, parameters, lr=0.001):

    updates = []

    c = 0
    for param in parameters:

        update = param - lr * theano.grad(cost, param)

        if c == 1 or c == 3:

            # update = t.minimum(t.abs_(update), np.pi) * (update / abs(update))
            #
            # update = t.maximum(update, 0)
            # update = t.minimum(update, np.pi)

            update = ifelse(t.lt(update, 0), np.pi * 2 - 0.001, update)
            update = ifelse(t.gt(update, np.pi * 2), 0.001, update)

        if c == 2:

            update = ifelse(t.lt(update, 2), float(20), update)

        elif c == 5 or c == 6:

            update = t.maximum(update, -5)
            update = t.minimum(update, 5)

        updates.append((param, update))

        c += 1

    return updates
Example #21
0
def group_div(X, W, H, beta, params):
    """Compute beta divergence D(X|WH), intra-class distance
    and intra-session distance for a particular
    (class, session) couple [1]_.


    Parameters
    ----------
    X : Theano tensor
        data
    W : Theano tensor
        Bases
    H : Theano tensor
        activation matrix
    beta : Theano scalar
    params : Theano tensor
        Matrix of parameter related to class/session.
            :params[0][0]: index for the (class, session) couple
            :params[1][0]: number of vector basis related to class
            :params[1][1]: number of vector basis related to session
            :params[2]: weight on the class/session similarity constraints
            :params[3]: sessions in which class c appears
            :params[4]: classes present in session s



    Returns
    -------
    cost : Theano scalar
        total cost
    div : Theano scalar
        beta divergence D(X|WH)
    sum_cls : Theano scalar
        intra-class distance
    sum_ses : Theano scalar
        intra-session distance"""
    ind = params[0][0]
    k_cls = params[1][0]
    k_ses = params[1][1]
    lambdas = params[2]
    Sc = params[3]
    Cs = params[4]
    res_ses, up = theano.scan(
        fn=lambda Cs, prior_result: prior_result
        + eucl_dist(W[ind, :, k_cls : k_cls + k_ses], W[Cs, :, k_cls : k_cls + k_ses]),
        outputs_info=T.zeros_like(beta),
        sequences=Cs,
    )
    sum_ses = ifelse(T.gt(Cs[0], 0), res_ses[-1], T.zeros_like(beta))
    res_cls, up = theano.scan(
        fn=lambda Sc, prior_result: prior_result + eucl_dist(W[ind, :, 0:k_cls], W[Sc, :, 0:k_cls]),
        outputs_info=T.zeros_like(beta),
        sequences=Sc,
    )
    sum_cls = ifelse(T.gt(Sc[0], 0), res_cls[-1], T.zeros_like(beta))
    betaDiv = beta_div(X, W[ind].T, H, beta)

    cost = lambdas[0] * sum_cls + lambdas[1] * sum_ses + betaDiv
    return cost, betaDiv, sum_cls, sum_ses
def one_run(my_x, my_y, my_z,
            my_u, my_v, my_w,
            my_weight,
            my_heat, my_albedo, my_microns_per_shell):

    # move
    random = rng.uniform(low=0.00003, high=1.)
    t = -T.log(random)

    x_moved = my_x + my_u*t
    y_moved = my_y + my_v*t
    z_moved = my_z + my_w*t

    # absorb
    shell = T.cast(T.sqrt(T.sqr(x_moved) + T.sqr(y_moved) + T.sqr(z_moved))
                   * my_microns_per_shell, 'int32')
    shell = T.clip(shell, 0, SHELL_MAX-1)

    new_weight = my_weight * my_albedo

    # new direction
    xi1 = rng.uniform(low=-1., high=1.)
    xi2 = rng.uniform(low=-1., high=1.)
    xi_norm = T.sqrt(T.sqr(xi1) + T.sqr(xi2))

    t_xi = rng.uniform(low=0.000000001, high=1.)

    # rescale xi12 to fit t_xi as norm
    xi1 = xi1/xi_norm * T.sqr(t_xi)
    xi2 = xi2/xi_norm * T.sqr(t_xi)

    u_new_direction = 2. * t_xi - 1.
    v_new_direction = xi1 * T.sqrt((1. - T.sqr(u_new_direction)) / t_xi)
    w_new_direction = xi2 * T.sqrt((1. - T.sqr(u_new_direction)) / t_xi)

    # roulette
    weight_for_starting_roulette = 0.001
    CHANCE = 0.1
    partakes_roulette = T.switch(T.lt(new_weight, weight_for_starting_roulette),
                                 1,
                                 0)
    roulette = rng.uniform(low=0., high=1.)
    loses_roulette = T.gt(roulette, CHANCE)
    # if roulette decides to terminate the photon: set weight to 0
    weight_after_roulette = ifelse(T.and_(partakes_roulette, loses_roulette),
                                     0.,
                                     new_weight)
    # if partakes in roulette but does not get terminated
    weight_after_roulette = ifelse(T.and_(partakes_roulette, T.invert(loses_roulette)),
                                     weight_after_roulette / CHANCE,
                                     weight_after_roulette)

    new_heat = (1.0 - my_albedo) * my_weight
    heat_i = my_heat[shell]

    return (x_moved, y_moved, z_moved,\
           u_new_direction, v_new_direction, w_new_direction,\
           weight_after_roulette),\
           OrderedDict({my_heat: T.inc_subtensor(heat_i, new_heat)})
Example #23
0
def scalar_armijo_search(phi, phi0, derphi0, c1=constant(1e-4),
                         n_iters=10, profile=0):
    """
    .. todo::

        WRITEME
    """
    alpha0 = one
    phi_a0 = phi(alpha0)
    alpha1 = -(derphi0) * alpha0 ** 2 / 2.0 /\
            (phi_a0 - phi0 - derphi0 * alpha0)
    phi_a1 = phi(alpha1)

    csol1 = phi_a0 <= phi0 + c1 * derphi0
    csol2 = phi_a1 <= phi0 + c1 * alpha1 * derphi0

    def armijo(alpha0, alpha1, phi_a0, phi_a1):
        factor = alpha0 ** 2 * alpha1 ** 2 * (alpha1 - alpha0)
        a = alpha0 ** 2 * (phi_a1 - phi0 - derphi0 * alpha1) - \
            alpha1 ** 2 * (phi_a0 - phi0 - derphi0 * alpha0)
        a = a / factor
        b = -alpha0 ** 3 * (phi_a1 - phi0 - derphi0 * alpha1) + \
            alpha1 ** 3 * (phi_a0 - phi0 - derphi0 * alpha0)
        b = b / factor

        alpha2 = (-b + TT.sqrt(abs(b ** 2 - 3 * a * derphi0))) / (3.0 * a)
        phi_a2 = phi(alpha2)

        end_condition = phi_a2 <= phi0 + c1 * alpha2 * derphi0
        end_condition = TT.bitwise_or(
            TT.isnan(alpha2), end_condition)
        end_condition = TT.bitwise_or(
            TT.isinf(alpha2), end_condition)
        alpha2 = TT.switch(
            TT.bitwise_or(alpha1 - alpha2 > alpha1 / constant(2.),
                  one - alpha2 / alpha1 < 0.96),
            alpha1 / constant(2.),
            alpha2)
        return [alpha1, alpha2, phi_a1, phi_a2], \
                theano.scan_module.until(end_condition)

    states = [alpha0, alpha1, phi_a0, phi_a1]
    # print 'armijo'
    rvals, _ = scan(
                armijo,
                outputs_info=states,
                n_steps=n_iters,
                name='armijo',
                mode=theano.Mode(linker='cvm'),
                profile=profile)

    sol_scan = rvals[1][-1]
    a_opt = ifelse(csol1, one,
                ifelse(csol2, alpha1,
                    sol_scan))
    score = ifelse(csol1, phi_a0,
                   ifelse(csol2, phi_a1,
                          rvals[2][-1]))
    return a_opt, score
 def _FindB_best(lPLcl, lPprev, dVLcl):
     srtLcl = tensor.argsort(-lPLcl)
     srtLcl = srtLcl[:beam_size]
     deltaVec = tensor.fill( lPLcl[srtLcl], numpy_floatX(-10000.))
     deltaVec = tensor.set_subtensor(deltaVec[0], lPprev)
     lProbBest = ifelse(tensor.eq( dVLcl, tensor.zeros_like(dVLcl)), lPLcl[srtLcl] + lPprev, deltaVec)
     xWIdxBest = ifelse(tensor.eq( dVLcl, tensor.zeros_like(dVLcl)), srtLcl, tensor.zeros_like(srtLcl)) 
     return lProbBest, xWIdxBest 
    def __call__(self, input): 
        mean = input.mean(self.axes, keepdims=True) 
        std = input.std(self.axes, keepdims=True) + self.epsilon 

        # Don't batchnoramlise a single data point
        mean = ifelse(T.gt(input.shape[0], 1), mean, T.zeros(mean.shape, dtype=mean.dtype))
        std  = ifelse(T.gt(input.shape[0], 1), std, T.ones(std.shape, dtype=std.dtype))

        return (input - mean) * T.addbroadcast((self.gamma / std) + self.beta, *self.axes)
Example #26
0
 def viterbi(self, tokScore, prevScore):
     transition = self.A[:-1]
     candidates = (prevScore + transition.T).T
     bestIndex = T.argmax(candidates,axis=0)
     scoreNew = T.max(candidates,axis=0) + tokScore
     scoreSum = T.sum(prevScore)
     scoreNew = ifelse(T.eq(scoreSum, 0), tokScore + self.A[-1], scoreNew)
     bestIndex = ifelse(T.eq(scoreSum, 0), T.arange(self.n_tags).astype('int64'), bestIndex.astype('int64'))
     return scoreNew, bestIndex
Example #27
0
 def compute_y(idx, p, q, S, D):
     yi = ifelse(T.eq(idx, 0),
                 T.dot(D[0], p[-1]),
                 ifelse(T.eq(idx, nT-1),
                        T.dot(D[-1],p[0]) + q[-1],
                        T.dot(D[idx], p[-idx-1]) + q[idx-1]
                    )
     )
     return yi
Example #28
0
def scaled_cost(x, t):
    sq_error = (x - t) ** 2
    above_thresh_sq_error = sq_error[(t > THRESHOLD).nonzero()]
    below_thresh_sq_error = sq_error[(t <= THRESHOLD).nonzero()]
    above_thresh_mean = above_thresh_sq_error.mean()
    below_thresh_mean = below_thresh_sq_error.mean()
    above_thresh_mean = ifelse(T.isnan(above_thresh_mean), 0.0, above_thresh_mean)
    below_thresh_mean = ifelse(T.isnan(below_thresh_mean), 0.0, below_thresh_mean)
    return (above_thresh_mean + below_thresh_mean) / 2.0
Example #29
0
 def test_merge(self):
     raise SkipTest("Optimization temporarily disabled")
     x = tensor.vector('x')
     y = tensor.vector('y')
     c = tensor.iscalar('c')
     z1 = ifelse(c, x + 1, y + 1)
     z2 = ifelse(c, x + 2, y + 2)
     z = z1 + z2
     f = theano.function([c, x, y], z)
     assert len([x for x in f.maker.env.toposort()
                 if isinstance(x.op, IfElse)]) == 1
    def scan_y(cur_step):
        # Compute pairwise affinities
        sum_y = tensor.sum(tensor.square(y_arg), 1)
        num = 1 / (1 + tensor.add(tensor.add(-2 * tensor.dot(y_arg, y_arg.T), sum_y).T, sum_y))
        num = tensor.set_subtensor(num[range(n),range(n)], 0)

        Q = num / tensor.sum(num)
        Q = tensor.maximum(Q, 1e-12)

        PQ = p_arg - Q

        def inner(pq_i, num_i, y_arg_i):
            return tensor.sum(tensor.tile(pq_i * num_i, (no_dims, 1)).T * (y_arg_i - y_arg), 0)
        dy_arg, _ = theano.scan(inner,
                outputs_info = None,
                sequences = [PQ, num, y_arg])
        dy_arg = tensor.cast(dy_arg,FLOATX)
        # dy_arg = y_arg

        momentum = ifelse(tensor.lt(cur_step, 20), 
                initial_momentum_f, 
                final_momentum_f)

        indexsa = tensor.neq((dy_arg>0), (iy_arg>0)).nonzero()
        indexsb = tensor.eq((dy_arg>0), (iy_arg>0)).nonzero()
        resulta = tensor.set_subtensor(gains_arg[indexsa], gains_arg[indexsa]+0.2)
        resultb = tensor.set_subtensor(resulta[indexsb], resulta[indexsb]*0.8)

        indexs_min = (resultb<min_gain_f).nonzero()
        new_gains_arg = tensor.set_subtensor(resultb[indexs_min], min_gain_f)

        # last step in simple version of SNE
        new_iy_arg = momentum * iy_arg - eta * (new_gains_arg * dy_arg)
        new_y_arg = y_arg + new_iy_arg
        new_y_arg = new_y_arg - tensor.tile(tensor.mean(new_y_arg, 0), (n, 1))

        # # Compute current value of cost function
        # if (cur_step + 1) % 10 == 0:
        #     C = tensor.sum(p_arg * tensor.log(p_arg / Q))
        #     print "Iteration ", (cur_step + 1), ": error is ", C

        # Stop lying about P-values

        # new_p_arg = p_arg
        # if cur_step == 2:
        #     new_p_arg = p_arg / 4
            # p_arg = p_arg / 4
            # p_arg.set_value(p_arg.get_value / 4)
        new_p_arg = ifelse(tensor.eq(cur_step, 100), 
                p_arg / 4, 
                p_arg)
        return [(y_arg,new_y_arg),(iy_arg,new_iy_arg), (gains_arg,new_gains_arg),(p_arg,new_p_arg)]
Example #31
0
def sr1(inverse_hessian, weight_delta, gradient_delta, epsilon=1e-8):
    epsilon = asfloat(epsilon)
    param = weight_delta - inverse_hessian.dot(gradient_delta)
    denominator = T.dot(param, gradient_delta)
    return ifelse(
        T.lt(T.abs_(denominator),
             epsilon * param.norm(L=2) * gradient_delta.norm(L=2)),
        inverse_hessian, inverse_hessian + T.outer(param, param) / denominator)
Example #32
0
 def iftrain(self, then_branch, else_branch):
     """
     Execute `then_branch` when training.
     """
     return ifelse(self._training_flag,
                   then_branch,
                   else_branch,
                   name="iftrain")
 def do_step(i, x_, h_, c_):
     """
     i: The step number (int)
     x_: An input vector
     h_: A hiddens state vector
     c_: A memory cell vector
     """
     y_prob, h, c = self.step(x_, h_, c_)
     y_candidate = ifelse(
         int(stochastic),
         rng.multinomial(n=1, pvals=y_prob[None, :])[0].astype(
             theano.config.floatX), y_prob)
     # y_candidate = ifelse(int(stochastic), rng.multinomial(n=1, pvals=y_prob.dimshuffle('x', 1))[0].astype(theano.config.floatX), y_prob)
     y = ifelse(
         i < n_primer_steps, primer[i], y_candidate
     )  # Note: If you get error here, you just need to prime with something on first call.
     return y, h, c
 def value_single(self, x, y, f):
     ret = T.mean([
         T.min([1. - (1 - y) + f[2], 1.]),
         T.min([1. - f[2] + (1 - y), 1.])
     ])
     ret = T.cast(ret, dtype=theano.config.floatX)
     return T.cast(ifelse(T.eq(self.condition_single(x, f), 1.), ret, 1.),
                   dtype=theano.config.floatX)
Example #35
0
    def __init__(self, layers, cost_y, cost_z, alpha=0.5, updater='Adam', size_y=128, verbose=2,
                 interpolated=True, zero_shot=False):
        self.settings = locals()
        del self.settings['self']
        self.layers = layers

        self.cost_y = cost_y
        self.cost_z = cost_z

        if isinstance(updater, basestring):
            self.updater = case_insensitive_import(passage.updates, updater)()
        else:
            self.updater = updater
        self.iterator = SortedPaddedXYZ(size_y=size_y, shuffle=False)
        self.size_y = size_y
        self.verbose = verbose
        self.interpolated = interpolated
        self.zero_shot = zero_shot
        for i in range(1, len(self.layers)):
            self.layers[i].connect(self.layers[i-1])
        self.params = flatten([l.params for l in layers])
        self.alpha = alpha
        
        self.X = self.layers[0].input

        self.y_tr = self.layers[-1].output_left(dropout_active=True)
        self.y_te = self.layers[-1].output_left(dropout_active=False)
        self.Y = T.tensor3()

        self.z_tr = self.layers[-1].output_right(dropout_active=True)
        self.z_te = self.layers[-1].output_right(dropout_active=False)
        self.Z = T.matrix()
                                     
        cost_y = self.cost_y(self.Y, self.y_tr) 
        if self.zero_shot:  # In zero-shot setting, we disable z-loss for examples with zero z-targets
            cost_z = ifelse(T.gt(self.Z.norm(2), 0.0),  self.cost_z(self.Z, self.z_tr), 0.0) 
        else:
            cost_z = self.cost_z(self.Z, self.z_tr) 
        if self.interpolated:
            cost = self.alpha * cost_y + (1.0 - self.alpha) * cost_z
        else:
            cost = self.alpha * cost_y + cost_z
        cost_valid_y = self.cost_y(self.Y, self.y_te)
        cost_valid_z = self.cost_z(self.Z, self.z_te)
        cost_valid = self.alpha * cost_valid_y + (1.0 - self.alpha) * cost_valid_z
        
        self.updates = self.updater.get_updates(self.params, cost)
        #grads = theano.tensor.grad(cost, self.params)
        #norm = theano.tensor.sqrt(sum([theano.tensor.sum(g**2) for g in grads]))
        self._train = theano.function([self.X, self.Y, self.Z], cost, updates=self.updates)
        self._params = theano.function([], self.params[0])
        self._cost = theano.function([self.X, self.Y, self.Z], cost)
        self._cost_valid = theano.function([self.X, self.Y, self.Z], 
                                           [cost_valid_y, cost_valid_z, cost_valid])

        self._predict_y = theano.function([self.X], self.y_te)
        self._predict_z = theano.function([self.X], self.z_te)
        self._predict = theano.function([self.X], [self.y_te, self.z_te])
Example #36
0
    def init_train_updates(self):
        network_input = self.variables.network_input
        network_output = self.variables.network_output
        inv_hessian = self.variables.inv_hessian
        prev_params = self.variables.prev_params
        prev_full_gradient = self.variables.prev_full_gradient

        params = list(iter_parameters(self))
        param_vector = parameters2vector(self)

        gradients = T.grad(self.variables.error_func, wrt=params)
        full_gradient = T.concatenate([grad.flatten() for grad in gradients])

        new_inv_hessian = ifelse(
            T.eq(self.variables.epoch, 1),
            inv_hessian,
            self.update_function(inv_hessian,
                                 param_vector - prev_params,
                                 full_gradient - prev_full_gradient)
        )
        param_delta = -new_inv_hessian.dot(full_gradient)

        def prediction(step):
            # TODO: I need to update this ugly solution later
            updated_params = param_vector + step * param_delta

            layer_input = network_input
            start_pos = 0
            for layer in self.layers:
                for param in layer.parameters:
                    end_pos = start_pos + param.size
                    parameter_name, parameter_id = param.name.split('_')
                    setattr(layer, parameter_name, T.reshape(
                        updated_params[start_pos:end_pos],
                        param.shape
                    ))
                    start_pos = end_pos
                layer_input = layer.output(layer_input)
            return layer_input

        def phi(step):
            return self.error(network_output, prediction(step))

        def derphi(step):
            error_func = self.error(network_output, prediction(step))
            return T.grad(error_func, wrt=step)

        step = asfloat(line_search(phi, derphi))
        updated_params = param_vector + step * param_delta
        updates = setup_parameter_updates(params, updated_params)

        updates.extend([
            (inv_hessian, new_inv_hessian),
            (prev_params, param_vector),
            (prev_full_gradient, full_gradient),
        ])

        return updates
Example #37
0
def AdaMaxAvg2(ws,
               objective,
               alpha=.01,
               beta1=.1,
               beta2=.001,
               beta3=0.01,
               n_accum=1):
    if n_accum == 1:
        return AdaMaxAvg(ws, objective, alpha, beta1, beta2, beta3)
    print 'AdaMax_Avg2', 'alpha:', alpha, 'beta1:', beta1, 'beta2:', beta2, 'beta3:', beta3, 'n_accum:', n_accum

    gs = G.ndict.T_grad(objective.sum(), ws, disconnected_inputs='raise')

    new = OrderedDict()

    from theano.ifelse import ifelse
    it = G.sharedf(0.)
    new[it] = it + 1
    reset = T.eq(T.mod(it, n_accum), 0)
    update = T.eq(T.mod(it, n_accum), n_accum - 1)

    ws_avg = []
    for j in range(len(ws)):
        w_avg = {}
        for i in ws[j]:
            _w = ws[j][i]
            _g = gs[j][i]
            #_g = T.switch(T.isnan(_g),T.zeros_like(_g),_g) #remove NaN's
            mom1 = G.sharedf(_w.get_value() * 0.)
            _max = G.sharedf(_w.get_value() * 0.)
            w_avg[i] = G.sharedf(_w.get_value())
            g_sum = G.sharedf(_w.get_value() * 0.)

            new[g_sum] = ifelse(reset, _g, g_sum + _g)
            new[mom1] = ifelse(update, (1 - beta1) * mom1 + beta1 * new[g_sum],
                               mom1)
            new[_max] = ifelse(
                update, T.maximum((1 - beta2) * _max,
                                  abs(new[g_sum]) + 1e-8), _max)
            new[_w] = ifelse(update, _w + alpha * new[mom1] / new[_max], _w)
            new[w_avg[i]] = ifelse(update,
                                   beta3 * new[_w] + (1. - beta3) * w_avg[i],
                                   w_avg[i])
        ws_avg += [w_avg]
    return new, ws_avg
Example #38
0
    def in_transit(self, t, r=0.0, texp=None):
        """Get a list of timestamps that are in transit

        Args:
            t (vector): A vector of timestamps to be evaluated.
            r (Optional): The radii of the planets.
            texp (Optional[float]): The exposure time.

        Returns:
            The indices of the timestamps that are in transit.

        """

        z = tt.zeros_like(self.a)
        r = tt.as_tensor_variable(r) + z
        R = self.r_star + z

        # Wrap the times into time since transit
        hp = 0.5 * self.period
        dt = tt.mod(self._warp_times(t) - self.t0 + hp, self.period) - hp

        if self.ecc is None:
            # Equation 14 from Winn (2010)
            k = r / R
            arg = tt.square(1 + k) - tt.square(self.b)
            factor = R / (self.a * self.sin_incl)
            hdur = hp * tt.arcsin(factor * tt.sqrt(arg)) / np.pi
            t_start = -hdur
            t_end = hdur
            flag = z

        else:
            M_contact = self.contact_points_op(
                self.a, self.ecc, self.cos_omega, self.sin_omega,
                self.cos_incl + z, self.sin_incl + z, R + r)
            flag = M_contact[2]

            t_start = (M_contact[0] - self.M0) / self.n
            t_start = tt.mod(t_start + hp, self.period) - hp
            t_end = (M_contact[1] - self.M0) / self.n
            t_end = tt.mod(t_end + hp, self.period) - hp

            t_start = tt.switch(tt.gt(t_start, 0.0),
                                t_start - self.period, t_start)
            t_end = tt.switch(tt.lt(t_end, 0.0),
                              t_end + self.period, t_end)

        if texp is not None:
            t_start -= 0.5*texp
            t_end += 0.5*texp

        mask = tt.any(tt.and_(dt >= t_start, dt <= t_end), axis=-1)
        result = ifelse(tt.all(tt.eq(flag, 0)),
                        tt.arange(t.size)[mask],
                        tt.arange(t.size))

        return result
Example #39
0
File: rae.py Project: Satssuki/nlpy
    def _unfold_gradients_func(self, rep, dec, g_dec, target_tok, tok, w, b, unfold_idx=0):
        distance = T.sum((target_tok - dec)**2)
        g_cost_dec = T.grad(distance, dec)

        tok_is_token = T.lt(tok, 0)
        g_dec_switcher = ifelse(tok_is_token, g_cost_dec, g_dec)

        output_distance = ifelse(tok_is_token, distance, T.constant(0.0, dtype=FLOATX))

        _rep, = make_float_vectors("_rep")
        _dec = self._decode_computation(_rep)[unfold_idx]
        node_map = {_rep: rep, _dec: dec}

        g_dec_rep = SRG(T.grad(T.sum(_dec), _rep), node_map) * g_dec_switcher
        g_dec_w = SRG(T.grad(T.sum(_dec), w), node_map) * g_dec_switcher
        g_dec_b = SRG(T.grad(T.sum(_dec), b), node_map) * g_dec_switcher

        return g_dec_rep, g_dec_w, g_dec_b, output_distance
Example #40
0
def bfgs(inverse_hessian, weight_delta, gradient_delta, maxrho=1e4):
    ident_matrix = cast_float(T.eye(inverse_hessian.shape[0]))
    maxrho = cast_float(maxrho)
    rho = cast_float(1.) / gradient_delta.dot(weight_delta)
    rho = ifelse(T.isinf(rho), maxrho * T.sgn(rho), rho)
    param1 = ident_matrix - T.outer(weight_delta, gradient_delta) * rho
    param2 = ident_matrix - T.outer(gradient_delta, weight_delta) * rho
    param3 = rho * T.outer(weight_delta, weight_delta)
    return param1.dot(inverse_hessian).dot(param2) + param3
Example #41
0
def dropout(tensor, apply_dropout, keep_prob):
    mask = RND_STREAM.binomial(n=1,
                               p=keep_prob,
                               size=tensor.shape,
                               dtype='float32')
    keep_prob = T.cast(keep_prob,
                       'float32')  # todo: weirdity around shared.set_value
    tensor_dropped = tensor * (1.0 / keep_prob) * mask
    return ifelse(apply_dropout, tensor_dropped, tensor)
Example #42
0
 def apply_me(args):
     if len(args) == 1:
         return args[0]
     else:
         rval = ifelse(args[0],
                       true,
                       apply_me(args[1:]),
                       name=name + str(len(args)))
         return rval
Example #43
0
 def apply_me(args):
     if len(args) == 1:
         return args[0]
     else:
         rval = ifelse(TT.eq(args[0], zero),
                       false,
                       apply_me(args[1:]),
                       name=name + str(len(args)))
         return rval
Example #44
0
    def learning_updates(self):
        batch_counter = theano.shared(np.array(0, dtype="int32"),
                                      "batch_counter")
        batch_size = self.batch_size
        to_update = batch_counter >= batch_size

        for param in self.network.parameters:
            # delta = self.learning_rate * T.grad(self.J, param)
            gsum = theano.shared(
                np.zeros(param.get_value().shape, dtype=FLOATX),
                "batch_gsum_%s" % param.name)
            yield gsum, ifelse(to_update, T.zeros_like(gsum),
                               gsum + T.grad(self.cost, param))
            delta = self.learning_rate * gsum / batch_size
            yield param, ifelse(to_update, param - delta, param)

        yield batch_counter, ifelse(to_update, T.constant(0, dtype="int32"),
                                    batch_counter + 1)
Example #45
0
 def compute_S(idx, Sp1, zAA, zBB):
     Sm = ifelse(
         T.eq(idx, nT - 2),
         T.dot(zBB[iib[-1]], Tla.matrix_inverse(zAA[iia[-1]])),
         T.dot(
             zBB[iib[idx]],
             Tla.matrix_inverse(zAA[iia[T.min([idx + 1, nT - 2])]] - T.dot(
                 Sp1, T.transpose(zBB[iib[T.min([idx + 1, nT - 2])]])))))
     return Sm
Example #46
0
 def __init__(self, input, p, drop_switch):
     self.input = input
     self.srng = RandomStreams(seed=234)
     self.rv_n = self.srng.normal(self.input.shape)
     self.mask = T.cast(
         self.rv_n < p, dtype=theano.config.floatX
     ) / p  # first  dropout mask, scaled with /p so we do not have to perform test time scaling (source: cs231n)
     self.output = ifelse(drop_switch > 0.5, self.input * self.mask,
                          self.input)  # only drop if drop == 1.0
Example #47
0
    def find_right_bound(prev_func_output, step, maxstep):
        func_output = f(step)
        is_output_decrease = T.gt(prev_func_output, func_output)
        step = ifelse(is_output_decrease, T.minimum(2. * step, maxstep), step)

        is_output_increse = T.lt(prev_func_output, func_output)
        stoprule = theano.scan_module.until(
            T.or_(is_output_increse, step > maxstep))
        return [func_output, step], stoprule
Example #48
0
    def test_callback_with_ifelse(self):
        a, b, c = tensor.scalars('abc')
        f = function([a, b, c],
                     ifelse(a, 2 * b, 2 * c),
                     mode=Mode(optimizer=None,
                               linker=vm.VM_Linker(callback=self.callback)))

        f(1, 2, 3)
        assert self.n_callbacks['IfElse'] == 2
Example #49
0
def norm_clip(dW, max_l2_norm=10.0):
    """
    Clip theano symbolic var dW to have some max l2 norm.
    """
    dW_l2_norm = T.sqrt(T.sum(dW**2.0))
    norm_ratio = (max_l2_norm / dW_l2_norm)
    clip_factor = ifelse(T.lt(norm_ratio, 1.0), norm_ratio, 1.0)
    dW_clipped = dW * clip_factor
    return dW_clipped
Example #50
0
 def __init__(self,
              rng,
              input,
              input_sh,
              n_out,
              W_0=None,
              b_0=None,
              activation=T.nnet.sigmoid,
              name='FullyConnectedLayer',
              p=.5,
              fit_intercept=True):
     super().__init__(rng, input, input_sh, name)
     #        print('FullyConnected input shape: ' + repr(input.size))
     self.output_sh = (input_sh[0], n_out)
     self.W_sh = (input_sh[1], n_out)
     self.n_out = n_out
     self.activation = activation
     self.fit_intercept = int(fit_intercept)
     # dropout
     self.default_p = p
     self.p = theano.shared(p)
     #        self.p.set_value(p
     self.drop_input = theano.shared(self._dropout(),
                                     name='drop_input' + name,
                                     borrow=True)
     # weights
     if W_0 is None:
         W_0 = self._default_W()
     self.W = theano.shared(W_0, name='W' + name, borrow=True)
     # bias
     if b_0 is None:
         b_0 = self._default_b()
     self.b = theano.shared(b_0, name='b' + name, borrow=True)
     # param list
     self.params = [self.W] + self.fit_intercept * [self.b]
     self.speeds = [np.sqrt(input_sh[1])] * 2
     # output
     input_ = ifelse(T.gt(self.p.eval(), 0),
                     input * self.drop_input / (1 - self.p), input)
     lin_out = ifelse(
         self.fit_intercept,
         T.dot(input_, self.W) + self.b.repeat(repeats=input_sh[0], axis=0),
         T.dot(input_, self.W))
     self.output = lin_out if (activation is None) else activation(lin_out)
Example #51
0
def _per_roi_pooling(coord, x):
    #x = tt.tensor3() # 512x7x7 float tensor
    #coord = tt.fvector() # [ xmin, ymin, xmax, ymax ] in [0,1] x-width,y-height
    # step 1: float coord to int
    nb_rows = x.shape[1]  # height,y
    nb_cols = x.shape[2]  # width,x
    icoords = tt.iround(
        coord * [nb_cols, nb_rows, nb_cols, nb_rows
                 ])  # xmin,xmax multiply nb_cols, ymin,ymax multiply nb_rows
    # 0 <= xmin < nb_cols
    xmin = tt.clip(icoords[0], 0, nb_cols - 1)
    # 0 <= ymin < nb_rows
    ymin = tt.clip(icoords[1], 0, nb_rows - 1)

    xmax = tt.clip(icoords[2], 1 + xmin,
                   nb_cols)  # min(xmax) = 1+xmin, max(xmax) = nb_cols
    ymax = tt.clip(icoords[3], 1 + ymin,
                   nb_rows)  # min (ymax) = 1+ymin, max(ymax) = nb_rows

    # if xmin == xmax == nb_cols
    xmin = ifelse(tt.eq(xmax, xmin), xmax - 1, xmin)
    # if ymin == ymax == nb_rows
    ymin = ifelse(tt.eq(ymax, ymin), ymax - 1, ymin)

    # step 2: extract raw sub-stensor
    roi = x[:, ymin:ymax, xmin:xmax]
    # step 3: resize raw to target_hx target_w
    '''
    # method1 (slow): upsampling -> downsampling 
    subtensor_h = ymax - ymin
    subtensor_w = xmax - xmin
    # upsample by ( target_h, target_w ) -> ( subtensor_h * target_h, subtensor_w * target_w )
    kernel = tt.ones((target_h, target_w)) # create ones filter
    roi_up,_ =scan(fn=lambda r2d, kernel: kron(r2d,kernel),sequences = roi,non_sequences = kernel)
    # downsample to (target_h, target_w)
    #target = roi_up[:,::subtensor_h,::subtensor_w]
    target = max_pooling(roi_up, subtensor_h, subtensor_w)
    '''
    # method 2
    if cfg.NET.POOL_METHOD == 'slicepool':
        target = slice_pooling(roi, target_h, target_w)
    else:
        target = float_max_pooling(roi, target_h, target_w)
    return K.flatten(target)
Example #52
0
  def __init__(self, collapse='mean', maxout=False, transpose=False, **kwargs):
    super(TwoDToOneDLayer, self).__init__(1, **kwargs)
    self.set_attr('collapse', collapse)
    self.set_attr('transpose', transpose)
    Y = self.sources[0].output
    if transpose:
      Y = Y.dimshuffle(1, 0, 2, 3)

    #index handling
    def index_fn(index, size):
      return T.set_subtensor(index[:size], numpy.cast['int8'](1))
    index_init = T.zeros((Y.shape[2],Y.shape[1]), dtype='int8')
    self.index, _ = theano.scan(index_fn, [index_init, T.cast(self.sources[0].output_sizes[:,1],"int32")])
    self.index = self.index.dimshuffle(1, 0)
    n_out = self.sources[0].attrs['n_out']

    if maxout:
      Y = Y.max(axis=3).dimshuffle(0,1,2,'x')

    if collapse == 'sum' or collapse == True:
      Y = Y.sum(axis=0)
    elif collapse == 'mean':
      Y = Y.mean(axis=0)
    elif collapse == 'conv':
      from returnn.theano.util import circular_convolution
      Y, _ = theano.scan(lambda x_i,x_p:circular_convolution(x_i,x_p),Y,Y[0])
      Y = Y[-1]
    elif collapse == 'flatten':
      self.index = T.ones((Y.shape[0] * Y.shape[1], Y.shape[2]), dtype='int8')
      Y = Y.reshape((Y.shape[0]*Y.shape[1],Y.shape[2],Y.shape[3]))
    elif str(collapse).startswith('pad_'):
      pad = numpy.int32(collapse.split('_')[-1])
      Y = ifelse(T.lt(Y.shape[0],pad),T.concatenate([Y,T.zeros((pad-Y.shape[0],Y.shape[1],Y.shape[2],Y.shape[3]),'float32')],axis=0),
                 ifelse(T.gt(Y.shape[0],pad),Y[:pad],Y))
      Y = Y.dimshuffle(1,2,3,0).reshape((Y.shape[1],Y.shape[2],Y.shape[3]*Y.shape[0]))
      n_out *= pad
    elif collapse != False:
      assert False, "invalid collapse mode"

    if self.attrs['batch_norm']:
      Y = self.batch_norm(Y, n_out, force_sample=False)
    self.output = Y
    self.act = [Y, Y]
    self.set_attr('n_out', n_out)
Example #53
0
    def search_iteration_step(x_previous, x_current, y_previous, y_current,
                              y_deriv_previous, is_first_iteration, x_star):

        y_deriv_current = f_deriv(x_current)

        x_new = x_current * asfloat(2)
        y_new = f(x_new)

        condition1 = T.or_(
            y_current > (y0 + c1 * x_current * y_deriv_0),
            T.and_(y_current >= y_previous, T.bitwise_not(is_first_iteration)))
        condition2 = T.abs_(y_deriv_current) <= -c2 * y_deriv_0
        condition3 = y_deriv_current >= zero

        x_star = ifelse(
            condition1,
            zoom(x_previous, x_current, y_previous, y_current,
                 y_deriv_previous, f, f_deriv, y0, y_deriv_0, c1, c2),
            ifelse(
                condition2,
                x_current,
                ifelse(
                    condition3,
                    zoom(x_current, x_previous, y_current, y_previous,
                         y_deriv_current, f, f_deriv, y0, y_deriv_0, c1, c2),
                    x_new,
                ),
            ),
        )
        y_deriv_previous_new = ifelse(condition1, y_deriv_previous,
                                      y_deriv_current)

        is_any_condition_satisfied = sequential_or(condition1, condition2,
                                                   condition3)
        y_current_new = ifelse(is_any_condition_satisfied, y_current, y_new)
        return ([
            x_current, x_new, y_current, y_current_new, y_deriv_previous_new,
            theano_false, x_star
        ],
                theano.scan_module.scan_utils.until(
                    sequential_or(
                        T.eq(x_new, zero),
                        is_any_condition_satisfied,
                    )))
Example #54
0
def apply_mean_stress_theory(m_s_th, sm, rng, sn_0, r_m, r_y):
    rng = ifelse(
        tt.eq(1, m_s_th),
        ifelse(tt.lt(0, sm), rng / (1 - (sm / r_m)),
               ifelse(tt.le(r_m, tt.abs_(sm)), 1.01 * sn_0, rng)),
        ifelse(
            tt.eq(2, m_s_th),
            ifelse(tt.lt(tt.abs_(sm), r_m), rng / (1 - (sm / r_m)**2),
                   ifelse(tt.le(r_m, sm), 1.01 * sn_0, rng)),
            ifelse(
                tt.eq(3, m_s_th),
                ifelse(
                    tt.lt(0, sm) & tt.lt(sm, r_y), rng / 1 - (sm / r_y),
                    ifelse(tt.le(r_y, tt.abs_(sm)), 1.01 * sn_0, rng)), rng)))
    return rng
Example #55
0
    def interval_reduction(a, b, c, d, tol):
        fc = f(c)
        fd = f(d)

        a, b, c, d = ifelse(T.lt(fc,
                                 fd), [a, d, d - golden_ratio * (d - a), c],
                            [c, b, d, c + golden_ratio * (b - c)])

        stoprule = theano.scan_module.until(T.lt(T.abs_(c - d), tol))
        return [a, b, c, d], stoprule
Example #56
0
    def init_train_updates(self):
        updates = super(ErrDiffStepUpdate, self).init_train_updates()

        step = self.variables.step
        last_error = self.variables.last_error
        previous_error = self.variables.previous_error

        step_update_condition = ifelse(
            last_error < previous_error,
            self.update_for_smaller_error * step,
            ifelse(
                last_error > self.update_for_bigger_error * previous_error,
                self.update_for_bigger_error * step,
                step
            )

        )
        updates.append((step, step_update_condition))
        return updates
Example #57
0
 def compute_D(idx, Dm1, zS, zAA, zBB):
     D = ifelse(
         T.eq(idx, nT - 1),
         T.dot(
             Tla.matrix_inverse(zAA[iia[-1]]),
             III + T.dot(T.transpose(zBB[iib[idx - 1]]), T.dot(Dm1, S[0]))),
         ifelse(
             T.eq(idx, 0),
             Tla.matrix_inverse(zAA[iia[0]] -
                                T.dot(zBB[iib[0]], T.transpose(S[-1]))),
             T.dot(
                 Tla.matrix_inverse(
                     zAA[iia[idx]] -
                     T.dot(zBB[iib[T.min([idx, nT - 2])]],
                           T.transpose(S[T.max([-idx - 1, -nT + 1])]))),
                 III +
                 T.dot(T.transpose(zBB[iib[T.min([idx - 1, nT - 2])]]),
                       T.dot(Dm1, S[-idx])))))
     return D
Example #58
0
    def test_callback_with_ifelse(self):
        a, b, c = tensor.scalars("abc")
        f = function(
            [a, b, c],
            ifelse(a, 2 * b, 2 * c),
            mode=Mode(optimizer=None, linker=VMLinker(callback=self.callback)),
        )

        f(1, 2, 3)
        assert self.n_callbacks["IfElse"] == 2
Example #59
0
 def distribution_helper(self, w, X, F, conds):
     nx = X.shape[0]
     distr = T.alloc(1.0, nx, self.K)
     distr,_ = theano.scan( 
         lambda c,x,f,d: ifelse(T.eq(c,1.), self.distribution_helper_helper(x,f), d),
         sequences=[conds, X, F, distr])
     distr,_ = theano.scan(
         lambda d: -w*(T.min(d,keepdims=True)-d), # relative value w.r.t the minimum
         sequences=distr)
     return distr
Example #60
0
 def output_index(self):
   from theano.ifelse import ifelse
   index = self.index
   if self.sources:
     # In some cases, e.g. forwarding, the target index (for "classes") might have shape[0]==0.
     # Or shape[0]==1 with index[0]==0. See Dataset.shapes_for_batches().
     # Use source index in that case.
     have_zero = T.le(index.shape[0], 1) * T.eq(T.sum(index[0]), 0)
     index = ifelse(have_zero, self.sources[0].index, index)
   return index