Example #1
0
	def  output(self, X):
		# TODO: activation for ReLu.

		if self.activation == 'sigmoid':
			return  1 / (1 + T.exp(-T.dot(X, self.w) - self.b))
		elif self.activation == 'tanh':
			return T.tanh(T.dot(X, self.w) + self.b)
Example #2
0
  def forwardPass(self, x):
    # Sample from the visible layer
    # Get the mask that is used for the visible units
    if self.visibleDropout in [1.0, 1]:
      currentLayerValues = x
    else:
      dropoutMask = self.theanoRng.binomial(n=1, p=self.visibleDropout,
                                            size=x.shape,
                                            dtype=theanoFloat)
      currentLayerValues = x * dropoutMask

    for stage in xrange(self.nrWeights -1):
      w = self.weights[stage]
      b = self.biases[stage]
      linearSum = T.dot(currentLayerValues, w) + b
      # dropout: give the next layer only some of the units from this layer
      if self.hiddenDropout in  [1.0, 1]:
        currentLayerValues = self.activationFunction.deterministic(linearSum)
      else:
        dropoutMaskHidden = self.theanoRng.binomial(n=1, p=self.hiddenDropout,
                                            size=linearSum.shape,
                                            dtype=theanoFloat)
        currentLayerValues = dropoutMaskHidden * self.activationFunction.deterministic(linearSum)

    # Last layer operations, no dropout in the output
    w = self.weights[self.nrWeights - 1]
    b = self.biases[self.nrWeights - 1]
    linearSum = T.dot(currentLayerValues, w) + b
    currentLayerValues = self.classificationActivationFunction.deterministic(linearSum)

    return currentLayerValues
Example #3
0
        def _step(x_, h_, c_, pred_, prob_):
            h_a = []
            c_a = []
            for it in range(self.n_levels):
                preact = T.dot(h_[it], self.U[it])
                preact += T.dot(x_, self.W[it]) + self.b[it]

                i = T.nnet.sigmoid(_slice(preact, 0, self.n_dim))
                f = T.nnet.sigmoid(_slice(preact, 1, self.n_dim))
                o = T.nnet.sigmoid(_slice(preact, 2, self.n_dim))
                c = T.tanh(_slice(preact, 3, self.n_dim))

                c = f * c_[it] + i * c
                h = o * T.tanh(c)

                h_a.append(h)
                c_a.append(c)

                x_ = h

            q = T.dot(h, self.L) + self.b0
            prob = T.nnet.softmax(q)
            pred = T.argmax(prob, axis=1)

            return T.stack(h_a).squeeze(), T.stack(c_a).squeeze(), pred, prob
Example #4
0
    def __init__(self,
                 input=tensor.dvector('input'),
                 target=tensor.dvector('target'),
                 n_input=1, n_hidden=1, n_output=1, lr=1e-3, **kw):
        super(NNet, self).__init__(**kw)

        self.input = input
        self.target = target
        self.lr = shared(lr, 'learning_rate')
        self.w1 = shared(numpy.zeros((n_hidden, n_input)), 'w1')
        self.w2 = shared(numpy.zeros((n_output, n_hidden)), 'w2')
        # print self.lr.type

        self.hidden = sigmoid(tensor.dot(self.w1, self.input))
        self.output = tensor.dot(self.w2, self.hidden)
        self.cost = tensor.sum((self.output - self.target)**2)

        self.sgd_updates = {
            self.w1: self.w1 - self.lr * tensor.grad(self.cost, self.w1),
            self.w2: self.w2 - self.lr * tensor.grad(self.cost, self.w2)}

        self.sgd_step = pfunc(
            params=[self.input, self.target],
            outputs=[self.output, self.cost],
            updates=self.sgd_updates)

        self.compute_output = pfunc([self.input], self.output)

        self.output_from_hidden = pfunc([self.hidden], self.output)
Example #5
0
  def __init__(self, input, nrLayers, weights, biases,
               visibleDropout, hiddenDropout,
               activationFunction, classificationActivationFunction):

    self.input = input

    self.classificationWeights = classificationWeightsFromTestWeights(weights,
                                            visibleDropout=visibleDropout,
                                            hiddenDropout=hiddenDropout)

    nrWeights = nrLayers - 1

    currentLayerValues = input

    for stage in xrange(nrWeights -1):
      w = self.classificationWeights[stage]
      b = biases[stage]
      linearSum = T.dot(currentLayerValues, w) + b
      currentLayerValues = activationFunction.deterministic(linearSum)

    self.lastHiddenActivations = currentLayerValues

    w = self.classificationWeights[nrWeights - 1]
    b = biases[nrWeights - 1]
    linearSum = T.dot(currentLayerValues, w) + b
    currentLayerValues = classificationActivationFunction.deterministic(linearSum)

    self.output = currentLayerValues
Example #6
0
 def set_inpt(self, inpt, inpt_dropout, mini_batch_size):
     self.inpt = inpt.reshape((mini_batch_size, self.n_in))
     self.output = softmax((1-self.p_dropout)*T.dot(self.inpt, self.w) + self.b)
     self.y_out = T.argmax(self.output, axis=1)
     self.inpt_dropout = dropout_layer(
         inpt_dropout.reshape((mini_batch_size, self.n_in)), self.p_dropout)
     self.output_dropout = softmax(T.dot(self.inpt_dropout, self.w) + self.b)
Example #7
0
    def generate(self, h_, c_, x_):
        h_a = []
        c_a = []
        for it in range(self.n_levels):
            preact = T.dot(x_, self.W[it])
            preact += T.dot(h_[it], self.U[it]) + self.b[it]

            i = T.nnet.sigmoid(self.slice(preact, 0, self.n_dim))
            f = T.nnet.sigmoid(self.slice(preact, 1, self.n_dim))
            o = T.nnet.sigmoid(self.slice(preact, 2, self.n_dim))
            c = T.tanh(self.slice(preact, 3, self.n_dim))

            c = f * c_[it] + i * c
            h = o * T.tanh(c)

            h_a.append(h)
            c_a.append(c)

            x_ = h

        q = T.dot(h, self.L) + self.b0
        # mask = T.concatenate([T.alloc(np_floatX(1.), q.shape[0] - 1), T.alloc(np_floatX(0.), 1)])
        prob = T.nnet.softmax(q / 1)

        return prob, T.stack(h_a).squeeze(), T.stack(c_a)[0].squeeze()
Example #8
0
    def _step(self, x, mask, M_tm1, wr_tm1, ww_tm1, *args):
        # read
        if self.inner_rnn == 'lstm':
            h_tm1 = args[0:2][::-1]  # (cell_tm1, h_tm1)
        else:
            h_tm1 = args[0:1]  # (h_tm1, )
        k_read, beta_read, g_read, gamma_read, s_read = self._get_controller_output(
            h_tm1[-1], self.W_k_read, self.b_k_read, self.W_c_read, self.b_c_read,
            self.W_s_read, self.b_s_read)
        wc_read = self._get_content_w(beta_read, k_read, M_tm1)
        wr_t = self._get_location_w(g_read, s_read, self.C, gamma_read,
                                    wc_read, wr_tm1, mask)
        M_read = self._read(wr_t, M_tm1)

        # update controller
        h_t = _update_controller(self, x, h_tm1, M_read, mask)

        # write
        k_write, beta_write, g_write, gamma_write, s_write = self._get_controller_output(
            h_t[-1], self.W_k_write, self.b_k_write, self.W_c_write,
            self.b_c_write, self.W_s_write, self.b_s_write)
        wc_write = self._get_content_w(beta_write, k_write, M_tm1)
        ww_t = self._get_location_w(g_write, s_write, self.C, gamma_write,
                                    wc_write, ww_tm1, mask)
        e = T.nnet.sigmoid(T.dot(h_t[-1], self.W_e) + self.b_e)
        a = T.tanh(T.dot(h_t[-1], self.W_a) + self.b_a)
        M_t = self._write(ww_t, e, a, M_tm1, mask)

        return (M_t, wr_t, ww_t) + h_t
Example #9
0
def free_energy_at_beta(model, samples, beta, pa_bias=None,
                        marginalize_odd=True):
    """
    Computes the free-energy of the sample `h1_sample`, for model p_k(h1).

    Inputs
    ------
    h1_sample: theano.shared
        Shared variable representing a sample of layer h1.
    beta: T.scalar
        Inverse temperature beta_k of model p_k(h1) at which to measure the free-energy.

    Returns
    -------
    Symbolic variable, free-energy of sample `h1_sample`, at inv. temp beta.
    """
    keep_idx = numpy.arange(not marginalize_odd, model.depth, 2)
    marg_idx = numpy.arange(marginalize_odd, model.depth, 2)

    # contribution of biases
    fe = 0.
    for i in keep_idx:
        fe -= T.dot(samples[i], model.bias[i]) * beta
    # contribution of biases
    for i in marg_idx:
        from_im1 = T.dot(samples[i-1], model.W[i]) if i >= 1 else 0.
        from_ip1 = T.dot(samples[i+1], model.W[i+1].T) if i < model.depth-1 else 0
        net_input = (from_im1 + from_ip1 + model.bias[i]) * beta
        fe -= T.sum(T.nnet.softplus(net_input), axis=1)

    fe -= T.dot(samples[not marginalize_odd], pa_bias) * (1. - beta)

    return fe
Example #10
0
 def recurrent_step(self, x_c_t, x_i_t, x_f_t, x_o_t, h_tm1, c_tm1, U_h_c, U_h_i, U_h_f, U_h_o):
     """
     Performs one computation step over time.
     """
     # new memory content c_tilde
     c_tilde = self.hidden_activation_func(
         x_c_t + T.dot(h_tm1, U_h_c)
     )
     # input gate
     i_t = self.inner_hidden_activation_func(
         x_i_t + T.dot(h_tm1, U_h_i)
     )
     # forget gate
     f_t = self.inner_hidden_activation_func(
         x_f_t + T.dot(h_tm1, U_h_f)
     )
     # new memory content
     c_t = f_t*c_tm1 + i_t*c_tilde
     # output gate
     o_t = self.inner_hidden_activation_func(
         x_o_t + T.dot(h_tm1, U_h_o)
     )
     # new hiddens
     h_t = o_t*self.hidden_activation_func(c_t)
     # return the hiddens and memory content
     return h_t, c_t
Example #11
0
    def __init__(self, rng, input, n_in, n_out, n_component):
        self.input = input

        W_value = rng.normal(0.0, 1.0/numpy.sqrt(n_in), size=(n_in, n_out*n_component))
        self.W_mu = theano.shared(value=numpy.asarray(W_value, dtype=theano.config.floatX), name='W_mu', borrow=True)

        self.W_sigma = theano.shared(value=numpy.asarray(W_value.copy(), dtype=theano.config.floatX), name='W_sigma', borrow=True)

        W_mix_value = rng.normal(0.0, 1.0/numpy.sqrt(n_in), size=(n_in, n_component))
        self.W_mix = theano.shared(value=numpy.asarray(W_mix_value, dtype=theano.config.floatX), name='W_mix', borrow=True)

        self.mu = T.dot(self.input, self.W_mu)    #assume linear output for mean vectors
        self.sigma = T.nnet.softplus(T.dot(self.input, self.W_sigma)) # + 0.0001
        #self.sigma = T.exp(T.dot(self.input, self.W_sigma)) # + 0.0001

        self.mix = T.nnet.softmax(T.dot(self.input, self.W_mix))

        self.delta_W_mu    = theano.shared(value = numpy.zeros((n_in, n_out*n_component),
                                           dtype=theano.config.floatX), name='delta_W_mu')
        self.delta_W_sigma = theano.shared(value = numpy.zeros((n_in, n_out*n_component),
                                           dtype=theano.config.floatX), name='delta_W_sigma')
        self.delta_W_mix   = theano.shared(value = numpy.zeros((n_in, n_component),
                                           dtype=theano.config.floatX), name='delta_W_mix')


        self.params = [self.W_mu, self.W_sigma, self.W_mix]
        self.delta_params = [self.delta_W_mu, self.delta_W_sigma, self.delta_W_mix]
Example #12
0
 def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total):
     sigma2 = tt.square(sigma)
     Kuu = cov_total(Xu)
     Kuf = cov_total(Xu, X)
     Luu = cholesky(stabilize(Kuu))
     A = solve_lower(Luu, Kuf)
     Qffd = tt.sum(A * A, 0)
     if self.approx == "FITC":
         Kffd = cov_total(X, diag=True)
         Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2
     else:  # VFE or DTC
         Lamd = tt.ones_like(Qffd) * sigma2
     A_l = A / Lamd
     L_B = cholesky(tt.eye(Xu.shape[0]) + tt.dot(A_l, tt.transpose(A)))
     r = y - mean_total(X)
     r_l = r / Lamd
     c = solve_lower(L_B, tt.dot(A, r_l))
     Kus = self.cov_func(Xu, Xnew)
     As = solve_lower(Luu, Kus)
     mu = self.mean_func(Xnew) + tt.dot(tt.transpose(As), solve_upper(tt.transpose(L_B), c))
     C = solve_lower(L_B, As)
     if diag:
         Kss = self.cov_func(Xnew, diag=True)
         var = Kss - tt.sum(tt.square(As), 0) + tt.sum(tt.square(C), 0)
         if pred_noise:
             var += sigma2
         return mu, var
     else:
         cov = (self.cov_func(Xnew) - tt.dot(tt.transpose(As), As) +
                tt.dot(tt.transpose(C), C))
         if pred_noise:
             cov += sigma2 * tt.identity_like(cov)
         return mu, stabilize(cov)
Example #13
0
File: AE.py Project: felidadae/dnn
    def compileFunctions(self, x_image_global, examples, ib, B, K, corrupt):
        if x_image_global == None:
            x_image_global = self.x

        if corrupt == 0.0:
            self.x_c = self.x
        else:
            self.x_c = self.theano_rng.binomial(
                size=self.x.shape, n=1, p=1-corrupt,
                dtype=theano.config.floatX) * self.x

        self.h = self.g(T.dot(self.x_c, self.W_hl) + self.b_hl)
        self.x_r = self.o(T.dot(self.h, self.W_ol) + self.b_ol)
        self.params = [self.W_hl, self.b_hl, self.b_ol]
        self.cost = \
            (- T.sum(
                self.x * T.log(self.x_r) + (1 - self.x) * T.log(1 - self.x_r),
                axis=(0,1)))

        gparams = T.grad(self.cost, self.params)
        updates = [
            (param, param - K * gparam)
            for param, gparam in zip(self.params, gparams)
        ]

        fun_train = theano.function(
            inputs=[ib],
            outputs=(self.cost, self.x_r, self.x_c),
            updates=updates,
            givens={
                x_image_global: examples[ib*B: (ib+1)*B]
            }
        )

        return fun_train
Example #14
0
def mlp(insize, hiddensize, outsize, transferfunc='tanh', outfunc='id'):
    P = util.ParameterSet(
        inweights=(insize, hiddensize),
        hiddenbias=hiddensize,
        outweights=(hiddensize, outsize),
        outbias=outsize)

    P.randomize(1e-4)

    inpt = T.matrix('inpt')
    hidden_in = T.dot(inpt, P.inweights)
    hidden_in += P.hiddenbias

    nonlinear = transfermap[transferfunc]
    hidden = nonlinear(hidden_in)
    output_in = T.dot(hidden, P.outweights)
    output_in += P.outbias
    output = output_in
    output = transfermap[outfunc](output_in)

    exprs = {'inpt': inpt,
             'hidden-in': hidden_in,
             'hidden': hidden,
             'output-in': output_in,
             'output': output}
    return exprs, P
Example #15
0
 def _build_marginal_likelihood_logp(self, y, X, Xu, sigma):
     sigma2 = tt.square(sigma)
     Kuu = self.cov_func(Xu)
     Kuf = self.cov_func(Xu, X)
     Luu = cholesky(stabilize(Kuu))
     A = solve_lower(Luu, Kuf)
     Qffd = tt.sum(A * A, 0)
     if self.approx == "FITC":
         Kffd = self.cov_func(X, diag=True)
         Lamd = tt.clip(Kffd - Qffd, 0.0, np.inf) + sigma2
         trace = 0.0
     elif self.approx == "VFE":
         Lamd = tt.ones_like(Qffd) * sigma2
         trace = ((1.0 / (2.0 * sigma2)) *
                  (tt.sum(self.cov_func(X, diag=True)) -
                   tt.sum(tt.sum(A * A, 0))))
     else:  # DTC
         Lamd = tt.ones_like(Qffd) * sigma2
         trace = 0.0
     A_l = A / Lamd
     L_B = cholesky(tt.eye(Xu.shape[0]) + tt.dot(A_l, tt.transpose(A)))
     r = y - self.mean_func(X)
     r_l = r / Lamd
     c = solve_lower(L_B, tt.dot(A, r_l))
     constant = 0.5 * X.shape[0] * tt.log(2.0 * np.pi)
     logdet = 0.5 * tt.sum(tt.log(Lamd)) + tt.sum(tt.log(tt.diag(L_B)))
     quadratic = 0.5 * (tt.dot(r, r_l) - tt.dot(c, c))
     return -1.0 * (constant + logdet + quadratic + trace)
    def __init__(self, rng, input, n_in, n_out, activation, W=None, b=None,
                 use_bias=False):

        self.input = input
        self.activation = activation

        if W is None:            
            if activation.func_name == "ReLU":
                W_values = numpy.asarray(0.01 * rng.standard_normal(size=(n_in, n_out)), dtype=theano.config.floatX)
            else:                
                W_values = numpy.asarray(rng.uniform(low=-numpy.sqrt(6. / (n_in + n_out)), high=numpy.sqrt(6. / (n_in + n_out)),
                                                     size=(n_in, n_out)), dtype=theano.config.floatX)
            W = theano.shared(value=W_values, name='W')        
        if b is None:
            b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
            b = theano.shared(value=b_values, name='b')

        self.W = W
        self.b = b

        if use_bias:
            lin_output = T.dot(input, self.W) + self.b
        else:
            lin_output = T.dot(input, self.W)

        self.output = (lin_output if activation is None else activation(lin_output))
    
        # parameters of the model
        if use_bias:
            self.params = [self.W, self.b]
        else:
            self.params = [self.W]
Example #17
0
 def _construct_mom_stuff(self):
     """
     Construct the cost function for the moment-matching "regularizer".
     """
     a = self.mom_mix_rate
     dist_mean = self.GN.dist_mean
     dist_cov = self.GN.dist_cov
     # Get the generated sample observations for this batch, transformed
     # linearly into the desired space for moment matching...
     X_b = T.dot(self.GN.output, self.mom_match_proj)
     # Get their mean
     batch_mean = T.mean(X_b, axis=0)
     # Get the updated generator distribution mean
     new_mean = ((1.0 - a[0]) * self.GN.dist_mean) + (a[0] * batch_mean)
     # Use the mean to get the updated generator distribution covariance
     X_b_minus_mean = X_b - new_mean
     # Whelp, I guess this line needs the cast... for some reason...
     batch_cov = T.dot(X_b_minus_mean.T, X_b_minus_mean) / T.cast(X_b.shape[0], 'floatX')
     new_cov = ((1.0 - a[0]) * self.GN.dist_cov) + (a[0] * batch_cov)
     # Get the cost for deviation from the target distribution's moments
     mean_err = new_mean - self.target_mean
     cov_err = (new_cov - self.target_cov)
     mm_cost = self.mom_match_weight[0] * \
             (T.sum(mean_err**2.0) + T.sum(cov_err**2.0))
     # Construct the updates for the running estimates of the generator
     # distribution's first and second-order moments.
     mom_updates = OrderedDict()
     mom_updates[self.GN.dist_mean] = new_mean
     mom_updates[self.GN.dist_cov] = new_cov
     return [mm_cost, mom_updates]
Example #18
0
    def infer_H_hat_two_sided(self, H_hat_below, W_below, H_hat_above, W_above, b):

        bottom_up = T.dot(H_hat_below, W_below)
        top_down =  T.dot(H_hat_above, W_above.T)
        total = bottom_up + top_down + b

        H_hat = T.nnet.sigmoid(total)
def __init():
    dataset = T.matrix("dataset", dtype=config.globalFloatType())
    trans_dataset = T.transpose(dataset)
    dot_mul = T.dot(dataset, trans_dataset)
    l2 = T.sqrt(T.sum(T.square(dataset), axis=1))
    
#     p =printing.Print("l2")
#     l2 = p(l2)
    
    l2_inv2 = T.inv(l2).dimshuffle(['x', 0])
#     p =printing.Print("l2_inv2")
#     l2_inv2 = p(l2_inv2)
    
    l2_inv1 = T.transpose(l2_inv2)
#     p =printing.Print("l2_inv1")
#     l2_inv1 = p(l2_inv1)
    
    l2_inv = T.dot(l2_inv1, l2_inv2)
    
#     p =printing.Print("l2_inv")
#     l2_inv = p(l2_inv)
    
    affinty = (T.mul(dot_mul, l2_inv) + 1) / 2
    globals()['__affinty_fun'] = theano.function(
             [dataset],
             [affinty],
             allow_input_downcast=True
             )
Example #20
0
	def gibbs_vhv(self,v_sample):
		h_activation_score = T.dot(v_sample,self.W)   + self.h_bias
		h_activation_probs, h_sample, h_updates = self.h.sample(h_activation_score)
		v_activation_score = T.dot(h_sample,self.W.T) + self.v_bias
		v_activation_probs, v_sample, v_updates  = self.v.sample(v_activation_score)
		return h_activation_score,h_activation_probs,h_sample,\
			   v_activation_score,v_activation_probs,v_sample
    def eq_log_pstar_vgh(self, g_hat, h_hat, s1_hat, s0_hat, v):
        """
        Computes the expectation (under the variational distribution q(g,h)=q(g)q(h)) of the
        log un-normalized probability, i.e. log p^*(g,h,s,v)
        :param g_hat: T.matrix of shape (batch_size, n_g)
        :param h_hat: T.matrix of shape (batch_size, n_h)
        :param v    : T.matrix of shape (batch_size, n_v)
        """
        from_v = self.from_v(v)
        from_h = self.from_h(h_hat)
        from_g = self.from_g(g_hat)

        # center variables
        cg_hat = g_hat - self.cg if self.flags['center_g'] else g_hat
        ch_hat = h_hat - self.ch if self.flags['center_h'] else h_hat
        # compute expectation of various s-quantities
        s_hat  = self.s_hat(ch_hat, s1_hat, s0_hat)
        ss_hat = self.s_hat(ch_hat, s1_hat**2 + 1./self.alpha_prec,
                                    s0_hat**2 + 1./self.alpha_prec)

        lq  = 0.
        lq += T.sum(from_v * self._mu * from_h, axis=1)
        lq += T.sum(from_v * s1_hat * from_h, axis=1)
        lq -= 0.5 * T.sum(self.alpha_prec * ss_hat, axis=1)
        lq -= T.sum(0.5 * self.lambd_prec * v**2, axis=1)
        lq += T.sum(self.alpha_prec * from_g  * s_hat, axis=1)
        lq += T.dot(cg_hat, self.gbias)
        lq += T.dot(ch_hat, self.hbias)
        return T.mean(lq), [g_hat, h_hat, s_hat, ss_hat, s1_hat, s0_hat, v]
Example #22
0
def _compile_func():
    beta = T.vector('beta')
    b = T.scalar('b')
    X = T.matrix('X')
    y = T.vector('y')
    C = T.scalar('C')
    params = [beta, b, X, y, C]
    cost = 0.5 * (T.dot(beta, beta) + b * b) + C * T.sum(
        T.nnet.softplus(
            -T.dot(T.diag(y), T.dot(X, beta) + b)
        )
    )
    # Function computing in one go the cost, its gradient
    # with regard to beta and with regard to the bias.
    cost_grad = theano.function(params,[
        cost,
        T.grad(cost, beta),
        T.grad(cost, b)
    ])

    # Function for computing element-wise sigmoid, used for
    # prediction.
    log_predict = theano.function(
        [beta, b, X],
        T.nnet.sigmoid(b + T.dot(X, beta)),
        on_unused_input='warn'
    )

    return (cost_grad, log_predict)
Example #23
0
	def factors(self, w, x, z, A):
		
		if self.data == 'binary':
			def f_xi(zi, xi):
				pi = T.nnet.sigmoid(T.dot(w['wx'], zi) + T.dot(w['bx'], A)) # pi = p(X_i=1)
				logpxi = - T.nnet.binary_crossentropy(pi, xi).sum(axis=0, keepdims=True)# logpxi = log p(X_i=x_i)
				#logpxi = T.log(pi*xi+(1-pi)*(1-xi)).sum(axis=0, keepdims=True)
				return logpxi
		elif self.data == 'gaussian':
			def f_xi(zi, xi):
				x_mean = T.dot(w['wx'], zi) + T.dot(w['bx'], A)
				x_logvar = T.dot(2*w['logsdx'], A)
				return ap.logpdfs.normal2(xi, x_mean, x_logvar).sum(axis=0, keepdims=True)
		else: raise Exception()
		
		# Factors of X and Z
		logpx = 0
		logpz = 0
		sd = T.dot(T.exp(w['logsd']), A)
		for i in range(self.n_steps):
			if i == 0:
				logpz += logpdfs.standard_normal(z['z'+str(i)]).sum(axis=0, keepdims=True)
			if i > 0:
				mean = T.tanh(T.dot(w['wz'], z['z'+str(i-1)]) + T.dot(w['bz'], A))
				logpz += logpdfs.normal(z['z'+str(i)], mean, sd).sum(axis=0, keepdims=True)
			logpxi = f_xi(z['z'+str(i)], x['x'+str(i)])
			logpx += logpxi
		
		# joint() = logp(x,z,w) = logp(x|z) + logp(z) + logp(w) + C
		# This is a proper scalar function
		logpw = 0
		for i in w:
			logpw += logpdfs.normal(w[i], 0, self.prior_sd).sum() # logp(w)
		
		return logpw, logpx, logpz, {}
Example #24
0
    def forward_prop(self,F,S):
        # We assume F is a m x n matrix (m rows, n columns)
        # and S is a 1 x o where o is our output size.
        # Our weight matrix (self.w) will be n x o.

        # Resize our bias to be appropriate size (batch_size x o)
        resized_bias = T.extra_ops.repeat(self.bh, F.shape[0], axis=0)
        # Combine our input data (F) with our weight matrix and bias.
        recurrent_gate = T.dot(F,self.wx) #T.nnet.sigmoid(T.dot(F,self.wx))

        # Resize the state value to have batch_size x output_size shape
        weighted_state = T.dot(S,self.wh)
        hidden_state = T.extra_ops.repeat(weighted_state, F.shape[0], axis=0)

        # Combine the recurrent_gate with our resized hidden state
        # Should I use T.tanh on the hidden_state?
        output = T.nnet.sigmoid(recurrent_gate + hidden_state + resized_bias)

        # This will average the values across the batch_size and
        # return a vector of size 1 x o (output_size)
        new_state = T.mean(hidden_state, axis=0)
        new_state = new_state.reshape((1,self.y))
        # Cast the output
        output_cast = T.cast(output,theano.config.floatX)
        return new_state,output_cast
Example #25
0
def model(X, w1, w2, w3, Max_Pooling_Shape, p_drop_conv, p_drop_hidden):
    l1 = T.flatten(
        dropout(max_pool_2d(rectify(conv2d(X, w1, border_mode="valid")), Max_Pooling_Shape), p_drop_conv), outdim=2
    )
    l2 = dropout(rectify(T.dot(l1, w2)), p_drop_hidden)
    pyx = softmax(T.dot(l2, w3))
    return pyx
Example #26
0
    def __init__(self, rng, train_input, test_input, n_in, n_out):

        # self.input = input.flatten(2)

        self.W = theano.shared(
            value=numpy.asarray(
                rng.uniform(
                    low=-numpy.sqrt(6. / (n_in + n_out)),
                    high=numpy.sqrt(6. / (n_in + n_out)),
                    size=(n_in, n_out)
                ),
                dtype=theano.config.floatX
            ),
            name='W',
            borrow=True
        )

        self.b = theano.shared(
            value=numpy.zeros((n_out,), dtype=theano.config.floatX),
            name='b',
            borrow=True
        )

        p = 0.5

        tmp_output = T.nnet.relu(T.dot(train_input.flatten(2), self.W) + self.b)
        srng = RandomStreams(rng.randint(1234))
        mask = (srng.uniform(size=tmp_output.shape) < p)/p

        self.train_output = tmp_output * mask
        self.test_output = T.nnet.relu(T.dot(test_input.flatten(2), self.W) + self.b)
        self.params = [self.W, self.b]
Example #27
0
    def __init__(self, rng, input1, input2, n_in, n_out):

        self.input1 = input1.flatten(2)
        self.input2 = input2.flatten(2)

        self.W = theano.shared(
            value=numpy.asarray(
                rng.uniform(
                    low=-numpy.sqrt(6. / (n_in + n_out)),
                    high=numpy.sqrt(6. / (n_in + n_out)),
                    size=(n_in, n_out)
                ),
                dtype='float32'
            ),
            name='W',
            borrow=True
        )

        self.b = theano.shared(
            value=numpy.zeros((n_out,), dtype='float32'),
            name='b',
            borrow=True
        )

        lin_output1 = T.dot(self.input1, self.W) + self.b
        lin_output2 = T.dot(self.input2, self.W) + self.b

        self.output1 = T.nnet.relu(lin_output1)
        self.output2 = T.nnet.relu(lin_output2)
        self.similarity = self.similarity_func(self.output1, self.output2)
        self.params = [self.W, self.b]
Example #28
0
 def rbm_fe(rbm_params, v, b):
     (weights, visbias, hidbias) = rbm_params
     vis_term = b * tensor.dot(v, visbias)
     hid_act = b * (tensor.dot(v, weights) + hidbias)
     fe = -vis_term - tensor.sum(tensor.log(1 + tensor.exp(hid_act)),
                                 axis=1)
     return fe
Example #29
0
		def pred_t(input_voc_t, weight_tm1, memory_tm1):
			rawinput_t = self.embedding[input_voc_t]
			input_t = T.dot(rawinput_t,self.input_w)
			read_m = T.dot(weight_tm1, memory_tm1)
			read_t = T.dot(read_m,self.read_w)
			controller_input = activation(input_t+read_t+self.input_b)
			hid = self.controller.getY(controller_input)
			output = T.nnet.softmax(T.dot(hid, self.output_w)+self.output_b)
			result = T.switch(T.eq(input_voc_t, 0),T.argmax(output,axis=1), theano.shared(0))
			#test = controller_input
			
			memory_inter = memory_tm1
			weight_inter = weight_tm1
			for head in self.heads:
				weight_inter, erase, add= head.emit_new_weight(hid, weight_inter, memory_inter)
				#write to memory
				weight_tdim = weight_inter.dimshuffle((0, 'x'))
				erase_dim = erase.dimshuffle(('x', 0))
				add_dim = add.dimshuffle(('x', 0))
				M_erased = memory_inter*(1-(weight_tdim*erase_dim))
				memory_inter = M_erased+(weight_tdim*add_dim)

			#testing = weight_tm1
			#testing2 = rawinput_t
			memory_t = memory_inter
			weight_t = weight_inter
			

			return weight_t, memory_t, output,result
Example #30
0
    def get_pred_prob(self):
        z1 = T.dot(self.input, self.W1) + self.b1
        a1 = T.tanh(z1)
        z2 = T.dot(a1, self.W2) + self.b2
        y_hat = T.nnet.softmax(z2) # output probabilties

        return y_hat
Example #31
0
    def factors(self, x, z, A):

        v = self.v
        w = self.w
        '''
        z is unused
        x['x'] is the data
        
        The names of dict z[...] may be confusing here: the latent variable z is not included in the dict z[...],
        but implicitely computed from epsilon and parameters in w.

        z is computed with g(.) from eps and variational parameters
        let logpx be the generative model density: log p(x|z) where z=g(.)
        let logpz be the prior of Z plus the entropy of q(z|x): logp(z) + H_q(z|x)
        So the lower bound L(x) = logpx + logpz
        
        let logpv and logpw be the (prior) density of the parameters
        '''
        def f_softplus(x):
            return T.log(T.exp(x) + 1)  # - np.log(2)

        def f_rectlin(x):
            return x * (x > 0)

        def f_rectlin2(x):
            return x * (x > 0) + 0.01 * x

        nonlinear = {
            'tanh': T.tanh,
            'sigmoid': T.nnet.sigmoid,
            'softplus': f_softplus,
            'rectlin': f_rectlin,
            'rectlin2': f_rectlin2
        }
        nonlinear_q = nonlinear[self.nonlinear_q]
        nonlinear_p = nonlinear[self.nonlinear_p]

        #rng = rng_curand.CURAND_RandomStreams(0)
        import theano.tensor.shared_randomstreams
        rng = theano.tensor.shared_randomstreams.RandomStreams(0)

        # Compute q(z|x,y)
        #
        # it seems that z = f(v['w0x'] * x + v['w0y'] * y + b)
        #
        hidden_q = [
            nonlinear_q(
                T.dot(v['w0x'], x['x']) + T.dot(v['w0y'], x['y']) +
                T.dot(v['b0'], A))
        ]
        for i in range(1, len(self.n_hidden_q)):
            hidden_q.append(
                nonlinear_q(
                    T.dot(v['w' + str(i)], hidden_q[-1]) +
                    T.dot(v['b' + str(i)], A)))

        q_mean = T.dot(v['mean_w'], hidden_q[-1]) + T.dot(v['mean_b'], A)
        if self.type_qz == 'gaussian' or self.type_qz == 'gaussianmarg':
            q_logvar = T.dot(v['logvar_w'], hidden_q[-1]) + T.dot(
                v['logvar_b'], A)
        else:
            raise Exception()

        # function for distribution q(z|x)
        theanofunc = lazytheanofunc('warn', mode='FAST_RUN')
        self.dist_qz['z'] = theanofunc([x['x'], x['mean_prior'], x['y']] + [A],
                                       [q_mean, q_logvar])

        # Compute virtual sample
        eps = rng.normal(size=q_mean.shape, dtype='float32')
        _z = q_mean + T.exp(0.5 * q_logvar) * eps

        # Compute log p(x|z)
        #
        # log p(x | z, y)
        # It seems that x = f((w0y * y + w0z * z) + b0)
        #
        hidden_p = [
            nonlinear_p(
                T.dot(w['w0y'], x['y']) + T.dot(w['w0z'], _z) +
                T.dot(w['b0'], A))
        ]
        for i in range(1, len(self.n_hidden_p)):
            hidden_p.append(
                nonlinear_p(
                    T.dot(w['w' + str(i)], hidden_p[-1]) +
                    T.dot(w['b' + str(i)], A)))
            if self.dropout:
                hidden_p[-1] *= 2. * (rng.uniform(size=hidden_p[-1].shape,
                                                  dtype='float32') > .5)

        if self.type_px == 'bernoulli':
            p = T.nnet.sigmoid(
                T.dot(w['out_w'], hidden_p[-1]) + T.dot(w['out_b'], A))
            _logpx = -T.nnet.binary_crossentropy(p, x['x'])
            self.dist_px['x'] = theanofunc([x['y'], _z] + [A], p)
        elif self.type_px == 'gaussian':
            x_mean = T.dot(w['out_w'], hidden_p[-1]) + T.dot(w['out_b'], A)
            x_logvar = T.dot(w['out_logvar_w'], hidden_p[-1]) + T.dot(
                w['out_logvar_b'], A)
            _logpx = ap.logpdfs.normal2(x['x'], x_mean, x_logvar)
            self.dist_px['x'] = theanofunc([x['y'], _z] + [A],
                                           [x_mean, x_logvar])
        elif self.type_px == 'laplace':
            x_mean = T.dot(w['out_w'], hidden_p[-1]) + T.dot(w['out_b'], A)
            x_logvar = T.dot(w['out_logvar_w'], hidden_p[-1]) + T.dot(
                w['out_logvar_b'], A)
            _logpx = ap.logpdfs.laplace(x['x'], x_mean, x_logvar)
            self.dist_px['x'] = theanofunc([x['y'], _z] + [A],
                                           [x_mean, x_logvar])

        else:
            raise Exception("")

        # Note: logpx is a row vector (one element per sample)
        logpx = T.dot(shared32(np.ones((1, self.n_x))),
                      _logpx)  # logpx = log p(x|z,w)

        # log p(y) (prior of y)
        #_logpy = w['logpy']
        #if self.uniform_y: _logpy *= 0
        #py_model = T.nnet.softmax(T.dot(_logpy, A).T).T
        #logpy = (- T.nnet.categorical_crossentropy(py_model.T, x['y'].T).T).reshape((1,-1))
        #logpx += logpy
        #self.dist_px['y'] = theanofunc([A], py_model)

        # log p(z) (prior of z)
        #
        # E_q[log(p(z))]
        #
        if self.type_pz == 'gaussianmarg':
            logpz = -0.5 * (np.log(2 * np.pi) + (
                (q_mean - x['mean_prior'])**2 + T.exp(q_logvar))).sum(
                    axis=0, keepdims=True)
        elif self.type_pz == 'gaussian':
            logpz = ap.logpdfs.standard_normal(_z).sum(axis=0, keepdims=True)
        elif self.type_pz == 'mog':
            pz = 0
            for i in range(self.n_mixture):
                pz += T.exp(
                    ap.logpdfs.normal2(_z, T.dot(w['mog_mean' + str(i)], A),
                                       T.dot(w['mog_logvar' + str(i)], A)))
            logpz = T.log(pz).sum(axis=0, keepdims=True) - self.n_z * np.log(
                float(self.n_mixture))
        elif self.type_pz == 'laplace':
            logpz = ap.logpdfs.standard_laplace(_z).sum(axis=0, keepdims=True)
        elif self.type_pz == 'studentt':
            logpz = ap.logpdfs.studentt(_z, T.dot(T.exp(w['logv']),
                                                  A)).sum(axis=0,
                                                          keepdims=True)
        else:
            raise Exception("Unknown type_pz")

        # loq q(z|x) (entropy of z)
        #
        # E_q[-log(q)]
        #
        if self.type_qz == 'gaussianmarg':
            logqz = -0.5 * (np.log(2 * np.pi) + 1 + q_logvar).sum(
                axis=0, keepdims=True)
        elif self.type_qz == 'gaussian':
            logqz = ap.logpdfs.normal2(_z, q_mean, q_logvar).sum(axis=0,
                                                                 keepdims=True)
        else:
            raise Exception()

        # Note: logpv and logpw are a scalars
        def f_prior(_w, prior_sd=self.prior_sd):
            return ap.logpdfs.normal(_w, 0, prior_sd).sum()

        logpv = 0
        logpv += f_prior(v['w0x'])
        logpv += f_prior(v['w0y'])
        for i in range(1, len(self.n_hidden_q)):
            logpv += f_prior(v['w' + str(i)])
        logpv += f_prior(v['mean_w'])
        if self.type_qz in ['gaussian', 'gaussianmarg']:
            logpv += f_prior(v['logvar_w'])

        logpw = 0
        logpw += f_prior(w['w0y'])
        logpw += f_prior(w['w0z'])
        for i in range(1, len(self.n_hidden_p)):
            logpw += f_prior(w['w' + str(i)])
        logpw += f_prior(w['out_w'])
        if self.type_px in ['sigmoidgaussian', 'gaussian', 'laplace']:
            logpw += f_prior(w['out_logvar_w'])
        if self.type_pz == 'studentt':
            logpw += f_prior(w['logv'])

        #return logpv, logpw, logpx, logpz, logqz
        return logpx, logpz, logqz
 def _get_fertility(self, c):
     fertility = T.nnet.sigmoid(T.dot(c, self.W_cov_fertility) + self.b_cov_fertility) * self.max_fertility
     fertility = fertility.reshape((c.shape[0], c.shape[1]))
     return fertility
Example #33
0
net = build_model()
# loading pretrained weights
model = pickle.load(open('./blvc_googlenet.pkl'))
lasagne.layers.set_all_param_values(net['prob'], model['param values'])

googlenet_features = lasagne.layers.get_output(net['pool5/7x7_s1'], X)

# add a mlp on top of this
W = theano.shared(
    numpy.random.uniform(low=-0.1, high=0.1,
                         size=(1024, 10)).astype(numpy.float32),
    'linear_weights')
b = theano.shared(numpy.zeros(10).astype(numpy.float32))
all_parameters = [W, b]

output = tensor.dot(googlenet_features, W) + b
pred = tensor.nnet.softmax(output)

loss = categorical_crossentropy(pred, targets).mean()
loss.name = 'loss'

loss_test = categorical_crossentropy(pred, targets).mean()
loss.name = 'loss_test'

error = tensor.neq(tensor.argmax(pred, axis=1), tensor.argmax(targets,
                                                              axis=1)).mean()
error.name = 'error'

error_test = tensor.neq(tensor.argmax(pred, axis=1),
                        tensor.argmax(targets, axis=1)).mean()
error.name = 'error_test'
Example #34
0
def dot(x, y):
    return T.dot(x, y)
	def build_read(M_curr,weight_curr):
		return T.dot(weight_curr, M_curr)
Example #36
0
 def rnn_step(_x_tm1, _h_tm1, _W_x, W_h):
     return T.nnet.sigmoid(T.dot(_x_tm1, _W_x.T) + T.dot(_h_tm1, W_h.T))
# -*- coding: utf-8 -*-
"""
Created on Sat Sep 30 11:11:12 2017
Define function 
Examples for Matrix
@author: DELL
"""
import theano 
import theano.tensor as T

a=T.matrix()
b=T.matrix()

c=a*b
d=T.dot(a,b)

F1= theano.function([a,b],c)
F2=theano.function([a,b],d)

A=[[1,2],[3,4]]
B=[[2,4],[6,8]]
C=[[1,2],[3,4],[5,6]]

print (F1(A,B))
print(F2(C,B))
# declare the x,y
x = T.dmatrix("x")
y = T.dvector("y")
learning_rate = T.dscalar("lr")

# declare the weight w and b
w = theano.shared(value=numpy.random.rand(feat), name="w")
b = theano.shared(value=0., name="b")

print("initialized weights \n")
print(w.get_value())
print(b.get_value())

# build the graph
output = 1/(1+T.exp(-T.dot(x, w)-b))
prediction = output > 0.5
cross_entropy = -y * T.log(output) - (1-y)*T.log(1-output)
loss = cross_entropy.mean() + 0.01*(w**2).sum()
gradW, gradb = T.grad(loss, [w, b])

# train function
train = theano.function(inputs=[x,y,learning_rate], outputs=[prediction, cross_entropy,loss, learning_rate], \
                        updates=((w,w-learning_rate*gradW), (b,b-learning_rate*gradb)))
# predict function
predict = theano.function(inputs=[x], outputs=prediction)

for i in range(training_step):
    if (i < 1000):
        learning_rate = 0.1
    else:
Example #39
0
 def mlp_pred(non_linearity):
     Z = [T.dot(X, W) for W in model.W1]
     H = map(non_linearity, Z)
     Z = [T.dot(h, W) for h, W in safe_izip(H, model.W2)]
     pred = sum(Z)
     return pred
Example #40
0
 def setup_outputs(self, input):
     lin_output = T.dot(input, self.W) + self.b
     self.output = (lin_output if self.activation is None else
                    self.activation(lin_output))
Example #41
0
    def __init__(self,
                 input,
                 n_in,
                 n_out,
                 W=None,
                 b=None,
                 prob_constraint_on=None):
        """ Initialize the parameters of the logistic regression

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
                      architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
                     which the datapoints lie

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
                      which the labels lie
    
        :type prob_constraint_on: boolean
        :param prob_constraint_on: whether we use the probability constraints or not

        """

        # initialize weight matrix W
        if W is None:
            self.W = theano.shared(value=np.zeros((n_in, n_out),
                                                  dtype=theano.config.floatX),
                                   name='W')
        else:
            self.W = W

        # initialize bias b
        if b is None:
            self.b = theano.shared(value=np.zeros((n_out, ),
                                                  dtype=theano.config.floatX),
                                   name='b')
        else:
            self.b = b

        # compute prediction
        # the linear output
        lin_output = T.dot(input, self.W) + self.b

        if prob_constraint_on == None:
            #### we do not use those probability constraints
            self.y_pred = Sigmoid(lin_output)

        elif prob_constraint_on == "top":
            #### We first predict the probability of each class using softmax.
            # We then weight those probabilities by multiplying them by the
            # probability of their parent in the Galaxy Zoo Decision Tree.

            # class 1
            prob_Class1 = SoftMax(lin_output[:, 0:3])

            # class 2
            prob_Class2 = SoftMax(lin_output[:, 3:5])
            # weight these probabilities using the probability of class 1.2
            prob_Class2 *= T.shape_padright(prob_Class1[:, 1])

            # class 3
            prob_Class3 = SoftMax(lin_output[:, 5:7])
            # weight these probabilities using the probability of class 2.2
            prob_Class3 *= T.shape_padright(prob_Class2[:, 1])

            # class 4
            prob_Class4 = SoftMax(lin_output[:, 7:9])
            # weight these probabilities using the probability of class 2.2
            prob_Class4 *= T.shape_padright(prob_Class2[:, 1])

            # class 5
            prob_Class5 = SoftMax(lin_output[:, 9:13])
            # weight these probabilities using the probability of class 2.2
            prob_Class5 *= T.shape_padright(prob_Class2[:, 1])

            # class 6
            prob_Class6 = SoftMax(lin_output[:, 13:15])

            # class 7
            prob_Class7 = SoftMax(lin_output[:, 15:18])
            # weight these probabilities using the probability of class 1.1
            prob_Class7 *= T.shape_padright(prob_Class1[:, 0])

            # class 8
            prob_Class8 = SoftMax(lin_output[:, 18:25])
            # weight these probabilities using the probability of class 6.1
            prob_Class8 *= T.shape_padright(prob_Class6[:, 0])

            # class 9
            prob_Class9 = SoftMax(lin_output[:, 25:28])
            # weight these probabilities using the probability of class 2.1
            prob_Class9 *= T.shape_padright(prob_Class2[:, 0])

            # class 10
            prob_Class10 = SoftMax(lin_output[:, 28:31])
            # weight these probabilities using the probability of class 4.1
            prob_Class10 *= T.shape_padright(prob_Class4[:, 0])

            # class 11
            prob_Class11 = SoftMax(lin_output[:, 31:37])
            # weight these probabilities using the probability of class 4.1
            prob_Class11 *= T.shape_padright(prob_Class4[:, 0])

            # concatenate all the probabilities into a single tensor variable
            self.y_pred = T.concatenate([
                prob_Class1, prob_Class2, prob_Class3, prob_Class4,
                prob_Class5, prob_Class6, prob_Class7, prob_Class8,
                prob_Class9, prob_Class10, prob_Class11
            ],
                                        axis=1)
        elif prob_constraint_on == "down":
            #### we use those probability constraints

            # the following probabilities should sum up to 1, so we use SoftMax
            # to predict all of them
            ind1 = [2, 8, 15, 16, 17, 25, 26, 27, 31, 32, 33, 34, 35, 36]
            p1 = SoftMax(lin_output[:, ind1])
            prob_Class1_3 = p1[:, 0]
            prob_Class4_2 = p1[:, 1]
            prob_Class7 = p1[:, 2:5]
            prob_Class9 = p1[:, 5:8]
            prob_Class11 = p1[:, 8:14]

            prob_Class4_1 = T.sum(prob_Class11, axis=1)
            prob_Class2_1 = T.sum(prob_Class9, axis=1)
            prob_Class2_2 = prob_Class4_1 + prob_Class4_2
            prob_Class1_1 = T.sum(prob_Class7, axis=1)
            prob_Class1_2 = prob_Class2_1 + prob_Class2_2
            prob_Class1 = T.concatenate([
                T.shape_padright(prob_Class1_1),
                T.shape_padright(prob_Class1_2),
                T.shape_padright(prob_Class1_3)
            ],
                                        axis=1)
            prob_Class2 = T.concatenate([
                T.shape_padright(prob_Class2_1),
                T.shape_padright(prob_Class2_2)
            ],
                                        axis=1)
            prob_Class4 = T.concatenate([
                T.shape_padright(prob_Class4_1),
                T.shape_padright(prob_Class4_2)
            ],
                                        axis=1)

            # the following probabilities should sum up to 1, so we use SoftMax
            # to predict all of them
            ind2 = [14, 18, 19, 20, 21, 24, 23, 24]
            p2 = SoftMax(lin_output[:, ind2])
            prob_Class6_2 = p2[:, 0]
            prob_Class8 = p2[:, 1:8]
            prob_Class6_1 = T.sum(prob_Class8, axis=1)
            prob_Class6 = T.concatenate([
                T.shape_padright(prob_Class6_1),
                T.shape_padright(prob_Class6_2)
            ],
                                        axis=1)

            # for the following probabilities, we resort to the same strategy in
            # the "top" option
            # class 3
            prob_Class3 = SoftMax(lin_output[:, 5:7])
            # weight these probabilities using the probability of class 2.2
            prob_Class3 *= T.shape_padright(prob_Class2[:, 1])

            # class 5
            prob_Class5 = SoftMax(lin_output[:, 9:13])
            # weight these probabilities using the probability of class 2.2
            prob_Class5 *= T.shape_padright(prob_Class2[:, 1])

            # class 10
            prob_Class10 = SoftMax(lin_output[:, 28:31])
            # weight these probabilities using the probability of class 4.1
            prob_Class10 *= T.shape_padright(prob_Class4[:, 0])

            # concatenate all the probabilities into a single tensor variable
            self.y_pred = T.concatenate([
                prob_Class1, prob_Class2, prob_Class3, prob_Class4,
                prob_Class5, prob_Class6, prob_Class7, prob_Class8,
                prob_Class9, prob_Class10, prob_Class11
            ],
                                        axis=1)

        # parameters of the model
        self.params = [self.W, self.b]
Example #42
0
 def recurrence(_x, i_m1, i_m2):
     ati = T.dot(_x, Ws[0])
     _m1 = T.maximum(i_m1, ati)
     ati = i_m1 + T.dot(_x, Ws[1])
     _m2 = T.maximum(i_m2, ati)
     return [_m1, _m2]
Example #43
0
def general_unitary_RNN(n_input,
                        n_hidden,
                        n_output,
                        input_type='real',
                        out_every_t=False,
                        loss_function='CE'):
    # STEPH: hey, it's mine! copying proclivity towards boilerplate from rest
    #   of code: this is derived from complex_RNN!
    np.random.seed(1234)
    rng = np.random.RandomState(1234)

    # TODO: all from here (requires some engineering thoughts)
    # TODO TODO TODO
    # Initialize parameters: theta, V_re, V_im, hidden_bias, U, out_bias, h_0
    V = initialize_matrix(n_input, 2 * n_hidden, 'V', rng)
    U = initialize_matrix(2 * n_hidden, n_output, 'U', rng)
    # STEPH: U was previously known as out_mat
    hidden_bias = theano.shared(np.asarray(rng.uniform(low=-0.01,
                                                       high=0.01,
                                                       size=(n_hidden, )),
                                           dtype=theano.config.floatX),
                                name='hidden_bias')
    # STEPH: hidden bias is simply initialised differently in this case
    reflection = initialize_matrix(2, 2 * n_hidden, 'reflection', rng)
    # STEPH: part of recurrence (~W)
    out_bias = theano.shared(np.zeros((n_output, ),
                                      dtype=theano.config.floatX),
                             name='out_bias')
    theta = theano.shared(np.asarray(rng.uniform(low=-np.pi,
                                                 high=np.pi,
                                                 size=(3, n_hidden)),
                                     dtype=theano.config.floatX),
                          name='theta')
    # STEPH: theta is used in recurrence several times (~W)
    bucket = np.sqrt(3. / 2 / n_hidden)
    h_0 = theano.shared(np.asarray(rng.uniform(low=-bucket,
                                               high=bucket,
                                               size=(1, 2 * n_hidden)),
                                   dtype=theano.config.floatX),
                        name='h_0')
    # STEPH: special way of initialising hidden state
    parameters = [V, U, hidden_bias, reflection, out_bias, theta, h_0]

    x, y = initialize_data_nodes(loss_function, input_type, out_every_t)

    index_permute = np.random.permutation(n_hidden)
    # STEPH: permutation used in recurrence (~W)

    index_permute_long = np.concatenate(
        (index_permute, index_permute + n_hidden))
    # STEPH: do the same permutation to both real and imaginary parts
    swap_re_im = np.concatenate((np.arange(n_hidden,
                                           2 * n_hidden), np.arange(n_hidden)))

    # STEPH: this is a permutation which swaps imaginary and real indices

    # define the recurrence used by theano.scan
    def recurrence(x_t, y_t, h_prev, cost_prev, acc_prev, theta, V,
                   hidden_bias, out_bias, U):

        # Compute hidden linear transform
        # STEPH: specific set of transformations, sliiightly not that important
        step1 = times_diag(h_prev, n_hidden, theta[0, :], swap_re_im)
        step2 = do_fft(step1, n_hidden)
        step3 = times_reflection(step2, n_hidden, reflection[0, :])
        step4 = vec_permutation(step3, index_permute_long)
        step5 = times_diag(step4, n_hidden, theta[1, :], swap_re_im)
        step6 = do_ifft(step5, n_hidden)
        step7 = times_reflection(step6, n_hidden, reflection[1, :])
        step8 = times_diag(step7, n_hidden, theta[2, :], swap_re_im)

        hidden_lin_output = step8
        # STEPH: hidden_lin_output isn't complex enough to have its own name
        #   in the other models

        # Compute data linear transform
        if loss_function == 'CE':
            data_lin_output = V[T.cast(x_t, 'int32')]
        else:
            data_lin_output = T.dot(x_t, V)

        # Total linear output
        lin_output = hidden_lin_output + data_lin_output

        # Apply non-linearity ----------------------------

        # scale RELU nonlinearity
        modulus = T.sqrt(lin_output**2 + lin_output[:, swap_re_im]**2)
        # STEPH: I think this comes to twice the modulus...
        #   TODO: check that
        rescale = T.maximum(
            modulus + T.tile(hidden_bias, [2]).dimshuffle('x', 0),
            0.) / (modulus + 1e-5)
        h_t = lin_output * rescale

        if out_every_t:
            lin_output = T.dot(h_t, U) + out_bias.dimshuffle('x', 0)
            cost_t, acc_t = compute_cost_t(lin_output, loss_function, y_t)
        else:
            cost_t = theano.shared(np.float32(0.0))
            acc_t = theano.shared(np.float32(0.0))

        return h_t, cost_t, acc_t

    # compute hidden states
    # STEPH: the same as in tanhRNN, here (except U ~ out_mat)
    h_0_batch = T.tile(h_0, [x.shape[1], 1])
    non_sequences = [theta, V, hidden_bias, out_bias, U]
    if out_every_t:
        sequences = [x, y]
    else:
        sequences = [
            x,
            T.tile(theano.shared(np.zeros((1, 1), dtype=theano.config.floatX)),
                   [x.shape[0], 1, 1])
        ]

    outputs_info = [
        h_0_batch,
        theano.shared(np.float32(0.0)),
        theano.shared(np.float32(0.0))
    ]

    [hidden_states, cost_steps,
     acc_steps], updates = theano.scan(fn=recurrence,
                                       sequences=sequences,
                                       non_sequences=non_sequences,
                                       outputs_info=outputs_info)

    if not out_every_t:
        lin_output = T.dot(hidden_states[-1, :, :], U) + out_bias.dimshuffle(
            'x', 0)
        costs = compute_cost_t(lin_output, loss_function, y)
    else:
        cost = cost_steps.mean()
        accuracy = acc_steps.mean()
        costs = [cost, accuracy]

    return [x, y], parameters, costs
Example #44
0
    def __init__(self, rng, input1, input2, n_in, n_out, W=None, b=None,
                 activation=T.tanh):
        """
        Typical hidden layer of a MLP: units are fully-connected and have
        sigmoidal activation function. Weight matrix W is of shape (n_in,n_out)
        and the bias vector b is of shape (n_out,).

        NOTE : The nonlinearity used here is tanh

        Hidden unit activation is given by: tanh(dot(input,W) + b)

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input1: theano.tensor.dmatrix
        :param input1: a symbolic tensor of shape ( n_in)
        
        :type input2: theano.tensor.dmatrix
        :param input2: a symbolic tensor of shape ( n_in)

        :type n_in: int
        :param n_in: dimensionality of input

        :type n_out: int
        :param n_out: number of hidden units

        :type activation: theano.Op or function
        :param activation: Non linearity to be applied in the hidden
                           layer
        """
        self.input1 = input1
        self.input2 = input2

        # `W` is initialized with `W_values` which is uniformely sampled
        # from sqrt(-6./(n_in+n_hidden)) and sqrt(6./(n_in+n_hidden))
        # for tanh activation function
        # the output of uniform if converted using asarray to dtype
        # theano.config.floatX so that the code is runable on GPU
        # Note : optimal initialization of weights is dependent on the
        #        activation function used (among other things).
        #        For example, results presented in [Xavier10] suggest that you
        #        should use 4 times larger initial weights for sigmoid
        #        compared to tanh
        #        We have no info for other function, so we use the same as
        #        tanh.
        if W is None:
            W_values = np.asarray(rng.uniform(
                    low=-np.sqrt(6. / (n_in + n_out)),
                    high=np.sqrt(6. / (n_in + n_out)),
                    size=(n_in, n_out)), dtype=theano.config.floatX)
            if activation == theano.tensor.nnet.sigmoid:
                W_values *= 4

            W = theano.shared(value=W_values, name='W', borrow=True)

        if b is None:
            b_values = np.zeros((n_out,), dtype=theano.config.floatX)
            b = theano.shared(value=b_values, name='b', borrow=True)

        self.W = W
        self.b = b

        lin_output1 = T.dot(input1, self.W) + self.b
        self.output1 = (lin_output1 if activation is None
                       else activation(lin_output1))
        lin_output2 = T.dot(input2, self.W) + self.b
        self.output2 = (lin_output2 if activation is None
                       else activation(lin_output2))
        # parameters of the model
        self.params = [self.W, self.b]
Example #45
0
def times_reflection(input, n_hidden, reflection):
    # comments here are Steph working through the maths
    # OK so the equation they give is:
    #   (I - 2 outer(v, v*)/|v|**2) h
    # (v is the reflection, h is the input)
    # this gets us to: (using Einstein notation)
    #   h_i - (2/|v|**2) v_i v*_j h_j
    # Looking at the final few lines of this function, what we would like to
    # show is: (since :n_hidden is the imaginary part of the output tensor)
    #       re(v_i v*_j h_j) = d - c
    #       im(v_i v*_j h_j) = a + b
    #
    # v = mu + i nu
    # h = alpha + i beta
    # v_i v*_j h_j = (mu_i + i nu_i) (mu_j - i nu_j) (alpha_j + i beta_j)
    #       = (mu_i mu_j - i mu_i nu_j + i nu_i mu_j + nu_i nu_j) (alpha_j + i beta_j)
    #       = (mu_i mu_j alpha_j + i mu_i mu_j beta_j +
    #          -i mu_i nu_j alpha_j + mu_i nu_j beta_j +
    #           i nu_i mu_j alpha_j - nu_i mu_j beta_j +
    #             nu_i nu_j alpha_j + i nu_i nu_j beta_j) = K
    #
    # What an expression!
    # Let's split it up:
    # re(K) = (mu_i mu_j alpha_j + mu_i nu_j beta_j +
    #          -nu_i mu_j beta_j + nu_i nu_j alpha_j)
    # im(K) = (mu_i mu_j beta_j - mu_i nu_j alpha_j +
    #          + nu_i mu_j alpha_j + nu_i nu_j beta_j)
    #
    # Now let's replace the scalar parts (the repeated js...)
    # αμ = alpha_j mu_j
    # αν = alpha_j nu_j
    # βμ = beta_j mu_j
    # βν = beta_j nu_j
    #
    # re(K) = (mu_i αμ + mu_i βν - nu_i βμ + nu_i αν )
    # im(K) = (mu_i βμ - mu_i αν + nu_i αμ + nu_i βν )
    #
    # Simplifying further...
    #
    # re(K) = mu_i ( αμ + βν ) - nu_i ( βμ - αν ) = nope - nope
    # im(K) = mu_i ( βμ - αν ) + nu_i ( αμ + βν ) = nope + nope
    #
    # Jumping ahead (see below) to the definitions of a, b, c, d...
    #
    # a = mu_i ( αμ - βν )
    # b = nu_i ( αν + βμ )
    # c = nu_i ( αμ - βν )
    # d = mu_i ( αν + βμ )
    #
    # And so:
    # d - c = mu_i ( αν + βμ ) - nu_i ( αμ - βν )
    # a + b = mu_i ( αμ - βν ) + nu_i ( αν + βμ )
    #
    # ... huh, what is going on?
    # ... double-checking my maths!
    # ... double-checking their maths!
    # ... looks OK?
    # ... will need to TRIPLE-check my maths when it's not 1am.
    #
    # Possibility: when they used a * in the paper, they meant *transpose*
    # and not *conjugate transpose*...
    #
    # This would result in...
    #
    # v_i v_j h_j = (mu_i + i nu_i) (mu_j + i nu_j) (alpha_j + i beta_j)
    #       = (mu_i mu_j + i mu_i nu_j + i nu_i mu_j - nu_i nu_j) (alpha_j + i beta_j)
    #       = (mu_i mu_j alpha_j + i mu_i mu_j beta_j +
    #          + i mu_i nu_j alpha_j - mu_i nu_j beta_j +
    #           i nu_i mu_j alpha_j - nu_i mu_j beta_j +
    #           - nu_i nu_j alpha_j - i nu_i nu_j beta_j) = J
    #
    # re(J) = (mu_i mu_j alpha_j - mu_i nu_j beta_j +
    #          - nu_i mu_j beta_j - nu_i nu_j alpha_j)
    # im(J) = (mu_i mu_j beta_j + mu_i nu_j alpha_j +
    #            nu_i mu_j alpha_j - nu_i nu_j beta_j)
    #
    # Replacing scalar parts...
    # re(J) = mu_i αμ - mu_i βν - nu_i βμ - nu_i αν
    # im(J) = mu_i βμ + mu_i αν + nu_i αμ - nu_i βν
    #
    # Further simplifying...
    #
    # re(J) = mu_i ( αμ - βν ) - nu_i ( βμ + αν ) = a - b
    # im(J) = mu_i ( βμ + αν ) + nu_i ( αμ - βν ) = d + c
    #
    # ... closer but NOT THE SAME
    # WHAT IS GOING ON HERE?

    input_re = input[:, :n_hidden]
    # alpha
    input_im = input[:, n_hidden:]
    # beta
    reflect_re = reflection[:n_hidden]
    # mu
    reflect_im = reflection[n_hidden:]
    # nu

    vstarv = (reflection**2).sum()

    # (the following things are roughly scalars)
    # (they actually are as long as the batch size, e.g. input[0])
    input_re_reflect_re = T.dot(input_re, reflect_re)
    # αμ
    input_re_reflect_im = T.dot(input_re, reflect_im)
    # αν
    input_im_reflect_re = T.dot(input_im, reflect_re)
    # βμ
    input_im_reflect_im = T.dot(input_im, reflect_im)
    # βν

    #
    a = T.outer(input_re_reflect_re - input_im_reflect_im, reflect_re)
    # outer(αμ - βν, mu)
    b = T.outer(input_re_reflect_im + input_im_reflect_re, reflect_im)
    # outer(αν + βμ, nu)
    c = T.outer(input_re_reflect_re - input_im_reflect_im, reflect_im)
    # outer(αμ - βν, nu)
    d = T.outer(input_re_reflect_im + input_im_reflect_re, reflect_re)
    # outer(αν + βμ, mu)

    output = input
    output = T.inc_subtensor(output[:, :n_hidden], -2. / vstarv * (a + b))
    output = T.inc_subtensor(output[:, n_hidden:], -2. / vstarv * (d - c))

    return output
Example #46
0
    def __init__(self,
                 rng,
                 input,
                 n_in,
                 n_out,
                 activation=Tanh,
                 use_bias=True,
                 W=None,
                 b=None):
        """
        Typical hidden layer of a MLP: units are fully-connected and have
        sigmoidal activation function. Weight matrix W is of shape (n_in,n_out)
        and the bias vector b is of shape (n_out,).

        NOTE : The nonlinearity used here is tanh

        Hidden unit activation is given by: tanh(dot(input,W) + b)

        :type rng: np.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.dmatrix
        :param input: a symbolic tensor of shape (n_examples, n_in)

        :type n_in: int
        :param n_in: dimensionality of input

        :type n_out: int
        :param n_out: number of hidden units

        :type activation: theano.Op or function
        :param activation: Non linearity to be applied in the hidden
                           layer
        """
        self.input = input
        self.activation = activation

        if W is None:
            W_values = np.asarray(0.01 *
                                  rng.standard_normal(size=(n_in, n_out)),
                                  dtype=theano.config.floatX)
            self.W = theano.shared(value=W_values, name='W', borrow=True)
        else:
            self.W = W

        if b is None:
            if activation == ReLU:
                # for ReLU, we initialize bias as constant 1 as suggested in
                # the dropout and ImageNet paper
                b_values = np.ones((n_out, ), dtype=theano.config.floatX)
            else:
                b_values = np.zeros((n_out, ), dtype=theano.config.floatX)
            self.b = theano.shared(value=b_values, name='b', borrow=True)
        else:
            self.b = b

        if use_bias:
            lin_output = T.dot(input, self.W) + self.b
        else:
            lin_output = T.dot(input, self.W)

        self.output = (lin_output
                       if activation is None else activation(lin_output))

        # parameters of the model
        if use_bias:
            self.params = [self.W, self.b]
        else:
            self.params = [self.W]
Example #47
0
def tanhRNN(n_input,
            n_hidden,
            n_output,
            input_type='real',
            out_every_t=False,
            loss_function='CE'):
    np.random.seed(1234)
    rng = np.random.RandomState(1234)
    # STEPH: initialising np's generic RNG and a specific rng identically
    #   uncertain why but maybe we'll find out soon

    x, y = initialize_data_nodes(loss_function, input_type, out_every_t)
    inputs = [x, y]

    h_0 = theano.shared(np.zeros((1, n_hidden), dtype=theano.config.floatX))
    V = initialize_matrix(n_input, n_hidden, 'V', rng)
    W = initialize_matrix(n_hidden, n_hidden, 'W', rng)
    # STEPH: W is the weights of the recurrence (can tell cause of its shape!)
    out_mat = initialize_matrix(n_hidden, n_output, 'out_mat', rng)
    hidden_bias = theano.shared(
        np.zeros((n_hidden, ), dtype=theano.config.floatX))
    out_bias = theano.shared(np.zeros((n_output, ),
                                      dtype=theano.config.floatX))
    parameters = [h_0, V, W, out_mat, hidden_bias, out_bias]

    def recurrence(x_t, y_t, h_prev, cost_prev, acc_prev, V, W, hidden_bias,
                   out_mat, out_bias):
        # all of this is to get the hidden state, and possibly cost/accuracy
        if loss_function == 'CE':
            data_lin_output = V[x_t]
            # STEPH: uncertain why this is named thusly
            # STEPH: in CE case, the data is just an index, I guess...
            #   basically, an indicator vector
            #   I think this may be confounded with the experimental setup
            #   CE appears in ?
        else:
            data_lin_output = T.dot(x_t, V)
            # STEPH: 'as normal', folding the data from the sequence in

        h_t = T.tanh(
            T.dot(h_prev, W) + data_lin_output +
            hidden_bias.dimshuffle('x', 0))
        # STEPH: dimshuffle (theano) here, makes row out of 1d vector, N -> 1xN
        if out_every_t:
            lin_output = T.dot(h_t, out_mat) + out_bias.dimshuffle('x', 0)
            cost_t, acc_t = compute_cost_t(lin_output, loss_function, y_t)
        else:
            # STEPH: no cost/accuracy until the end!
            cost_t = theano.shared(np.float32(0.0))
            acc_t = theano.shared(np.float32(0.0))

        return h_t, cost_t, acc_t

    non_sequences = [V, W, hidden_bias, out_mat, out_bias]
    # STEPH: naming due to scan (theano); these are 'fixed' values in scan

    h_0_batch = T.tile(h_0, [x.shape[1], 1])
    # STEPH: tile (theano) repeats input x according to pattern
    #   pattern is number of times to tile in each direction

    if out_every_t:
        sequences = [x, y]
    else:
        # STEPH: the 'y' here is just... a bunch of weirdly-shaped zeros?
        sequences = [
            x,
            T.tile(theano.shared(np.zeros((1, 1), dtype=theano.config.floatX)),
                   [x.shape[0], 1, 1])
        ]
    # STEPH: sequences here are the input we loop over...

    outputs_info = [
        h_0_batch,
        theano.shared(np.float32(0.0)),
        theano.shared(np.float32(0.0))
    ]
    # STEPH: naming due to scan, these are initialisation values... see return
    # value of recurrence: h_t, cost_t, acc_t...

    [hidden_states, cost_steps,
     acc_steps], updates = theano.scan(fn=recurrence,
                                       sequences=sequences,
                                       non_sequences=non_sequences,
                                       outputs_info=outputs_info)
    # STEPH: remembering how to do scan!
    #   outputs_info: contains initialisation, naming is bizarre, whatever
    #   non_sequences: unchanging variables
    #   sequences: tensors to be looped over
    #   so fn receives (sequences, previous output, non_sequences):
    #       this seems to square with the order of arguments in 'recurrence'
    #       TODO: read scan more carefully to confirm this

    if not out_every_t:
        lin_output = T.dot(hidden_states[-1, :, :],
                           out_mat) + out_bias.dimshuffle('x', 0)
        costs = compute_cost_t(lin_output, loss_function, y)
        # STEPH: cost is computed off the final hidden state
    else:
        cost = cost_steps.mean()
        accuracy = acc_steps.mean()
        costs = [cost, accuracy]

    return inputs, parameters, costs
Example #48
0
def orthogonal_RNN(n_input,
                   n_hidden,
                   n_output,
                   input_type='real',
                   out_every_t=False,
                   loss_function='CE',
                   basis=None):
    np.random.seed(1234)
    rng = np.random.RandomState(1234)

    x, y = initialize_data_nodes(loss_function, input_type, out_every_t)
    inputs = [x, y]

    # ---- encoder ---- #
    V = initialize_matrix(n_input, n_hidden, 'V', rng)
    # ---- decoder ---- #
    U = initialize_matrix(n_hidden, n_output, 'U', rng)
    out_bias = theano.shared(np.zeros((n_output, ),
                                      dtype=theano.config.floatX),
                             name='out_bias')
    # ---- hidden part ---- #
    dim_of_lie_algebra = n_hidden * (n_hidden - 1) / 2
    lambdas = theano.shared(np.asarray(rng.uniform(
        low=-1, high=1, size=(dim_of_lie_algebra, )),
                                       dtype=theano.config.floatX),
                            name='lambdas')
    # warning: symbolic_basis is expensive, memory-wise!
    if basis is None:
        symbolic_basis = theano.shared(np.asarray(
            rng.normal(size=(dim_of_lie_algebra, n_hidden, n_hidden)),
            dtype=theano.config.floatX),
                                       name='symbolic_basis')
    else:
        symbolic_basis = theano.shared(basis, name='symbolic_basis')
    # here it is!
    #O = T.expm(T.dot(lambdas, symbolic_basis))
    # YOLO
    #O = T.tensordot(lambdas, symbolic_basis, axes=[0, 0])
    #O = lambdas[0]*symbolic_basis[0] + lambdas[10]*symbolic_basis[10]
    O = lambdas[dim_of_lie_algebra - 1] * symbolic_basis[0]
    #lambdas[n_hidden*(n_hidden-1)/2 -1]*symbolic_basis[n_hidden*(n_hidden-1)/2 -1]
    # RIDICULOUS HACK THEANO IS WEIRD
    #for k in xrange(1, n_hidden*(n_hidden-1)/2):
    #        O += lambdas[k]*symbolic_basis[k]
    #    pdb.set_trace()
    #O = T.eye(n_hidden, n_hidden)
    # END YOLO
    # TODO: check maths on bucket
    bucket = np.sqrt(3. / 2 / n_hidden)
    h_0 = theano.shared(np.asarray(rng.uniform(low=-bucket,
                                               high=bucket,
                                               size=(1, n_hidden)),
                                   dtype=theano.config.floatX),
                        name='h_0')
    hidden_bias = theano.shared(np.asarray(rng.uniform(low=-0.01,
                                                       high=0.01,
                                                       size=(n_hidden, )),
                                           dtype=theano.config.floatX),
                                name='hidden_bias')

    # ---- all the parameters! ---- #
    parameters = [V, U, out_bias, lambdas, h_0, hidden_bias]

    def recurrence(x_t, y_t, h_prev, cost_prev, acc_prev, V, O, hidden_bias,
                   out_bias, U):
        if loss_function == 'CE':
            # STEPH: why is this cast here???
            data_lin_output = V[T.cast(x_t, 'int32')]
        else:
            data_lin_output = T.dot(x_t, V)
        h_t = T.nnet.relu(
            T.dot(h_prev, O) + data_lin_output +
            hidden_bias.dimshuffle('x', 0))

        if out_every_t:
            lin_output = T.dot(h_t, U) + out_bias.dimshuffle('x', 0)
            cost_t, acc_t = compute_cost_t(lin_output, loss_function, y_t)
        else:
            cost_t = theano.shared(np.float32(0.0))
            acc_t = theano.shared(np.float32(0.0))

        return h_t, cost_t, acc_t

    # compute hidden states
    h_0_batch = T.tile(h_0, [x.shape[1], 1])
    non_sequences = [V, O, hidden_bias, out_bias, U]
    if out_every_t:
        sequences = [x, y]
    else:
        sequences = [
            x,
            T.tile(theano.shared(np.zeros((1, 1), dtype=theano.config.floatX)),
                   [x.shape[0], 1, 1])
        ]

    outputs_info = [
        h_0_batch,
        theano.shared(np.float32(0.0)),
        theano.shared(np.float32(0.0))
    ]

    [hidden_states, cost_steps,
     acc_steps], updates = theano.scan(fn=recurrence,
                                       sequences=sequences,
                                       non_sequences=non_sequences,
                                       outputs_info=outputs_info)

    if not out_every_t:
        lin_output = T.dot(hidden_states[-1, :, :], U) + out_bias.dimshuffle(
            'x', 0)
        costs = compute_cost_t(lin_output, loss_function, y)
    else:
        cost = cost_steps.mean()
        accuracy = acc_steps.mean()
        costs = [cost, accuracy]

    return inputs, parameters, costs
Example #49
0
    def __init__(self, numpy_rng, theano_rng=None, input=None,
                 n_visible=784, n_hidden=500,
                 W=None, bhid=None, bvis=None, learning_rate=0.1, corruption_level=0.3):
        """
        Initialize the dA class by specifying the number of visible units (the
        dimension d of the input ), the number of hidden units ( the dimension
        d' of the latent or hidden space ) and the corruption level. The
        constructor also receives symbolic variables for the input, weights and
        bias. Such a symbolic variables are useful when, for example the input
        is the result of some computations, or when weights are shared between
        the dA and an MLP layer. When dealing with SdAs this always happens,
        the dA on layer 2 gets as input the output of the dA on layer 1,
        and the weights of the dA are used in the second stage of training
        to construct an MLP.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: number random generator used to generate weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                     generated based on a seed drawn from `rng`

        :type input: theano.tensor.TensorType
        :param input: a symbolic description of the input or None for
                      standalone dA

        :type n_visible: int
        :param n_visible: number of visible units

        :type n_hidden: int
        :param n_hidden:  number of hidden units

        :type W: theano.tensor.TensorType
        :param W: Theano variable pointing to a set of weights that should be
                  shared belong the dA and another architecture; if dA should
                  be standalone set this to None

        :type bhid: theano.tensor.TensorType
        :param bhid: Theano variable pointing to a set of biases values (for
                     hidden units) that should be shared belong dA and another
                     architecture; if dA should be standalone set this to None

        :type bvis: theano.tensor.TensorType
        :param bvis: Theano variable pointing to a set of biases values (for
                     visible units) that should be shared belong dA and another
                     architecture; if dA should be standalone set this to None

        :type corruption_level: float
        :param corruption_level: The amount of input corruption to use. Should be between 0 and 1.
        """
        self.n_visible = n_visible
        self.n_hidden = n_hidden

        self.learning_rate = learning_rate
        self.corruption_level = corruption_level
        
        # create a Theano random generator that gives symbolic random values
        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        # note : W' was written as `W_prime` and b' as `b_prime`
        if not W:
            # W is initialized with `initial_W` which is uniformly sampled
            # from -4*sqrt(6./(n_visible+n_hidden)) and
            # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if
            # converted using asarray to dtype
            # theano.config.floatX so that the code is runable on GPU
            initial_W = numpy.asarray(numpy_rng.uniform(
                      low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)),
                      high=4 * numpy.sqrt(6. / (n_hidden + n_visible)),
                      size=(n_visible, n_hidden)), dtype=theano.config.floatX)
            W = theano.shared(value=initial_W, name='W', borrow=True)

        if not bvis:
            bvis = theano.shared(value=numpy.zeros(n_visible,
                                         dtype=theano.config.floatX),
                                 borrow=True)

        if not bhid:
            bhid = theano.shared(value=numpy.zeros(n_hidden,
                                                   dtype=theano.config.floatX),
                                 name='b',
                                 borrow=True)

        self.W = W
        # b corresponds to the bias of the hidden
        self.b = bhid
        # b_prime corresponds to the bias of the visible
        self.b_prime = bvis
        # tied weights, therefore W_prime is W transpose
        self.W_prime = self.W.T
        self.theano_rng = theano_rng
        # if no input is given, generate a variable representing the input
        if input == None:
            # we use a matrix because we expect a minibatch of several
            # examples, each example being a row
            self.x = T.matrix(name='input')
        else:
            self.x = input

        self.params = [self.W, self.b, self.b_prime]
        
        
        self.hidden = T.nnet.sigmoid(T.dot(self.x, self.W) + self.b)

        self.reconstructed = T.nnet.sigmoid(T.dot(self.hidden, self.W_prime) + self.b_prime)
        #self.reconstructed_L = - T.sum(self.x * T.log(self.reconstructed) + (1 - self.x) * T.log(1 - self.reconstructed), axis=1)
        self.reconstructed_L = T.sum((self.x - self.reconstructed)**2,axis=1)
        
        dummy = self.x - self.b_prime
        self.F = T.sum(T.nnet.softplus(self.hidden), axis=1) - 0.5*T.sum(dummy*dummy, axis=1)
Example #50
0
def LSTM(n_input,
         n_hidden,
         n_output,
         input_type='real',
         out_every_t=False,
         loss_function='CE'):
    np.random.seed(1234)
    rng = np.random.RandomState(1234)

    # STEPH: i for input, f for forget, c for candidate, o for output
    W_i = initialize_matrix(n_input, n_hidden, 'W_i', rng)
    W_f = initialize_matrix(n_input, n_hidden, 'W_f', rng)
    W_c = initialize_matrix(n_input, n_hidden, 'W_c', rng)
    W_o = initialize_matrix(n_input, n_hidden, 'W_o', rng)
    U_i = initialize_matrix(n_hidden, n_hidden, 'U_i', rng)
    U_f = initialize_matrix(n_hidden, n_hidden, 'U_f', rng)
    U_c = initialize_matrix(n_hidden, n_hidden, 'U_c', rng)
    U_o = initialize_matrix(n_hidden, n_hidden, 'U_o', rng)
    # STEPH: note that U is not out_mat as it was in complex_RNN
    V_o = initialize_matrix(n_hidden, n_hidden, 'V_o', rng)
    b_i = theano.shared(np.zeros((n_hidden, ), dtype=theano.config.floatX))
    b_f = theano.shared(np.ones((n_hidden, ), dtype=theano.config.floatX))
    b_c = theano.shared(np.zeros((n_hidden, ), dtype=theano.config.floatX))
    b_o = theano.shared(np.zeros((n_hidden, ), dtype=theano.config.floatX))
    h_0 = theano.shared(np.zeros((1, n_hidden), dtype=theano.config.floatX))
    state_0 = theano.shared(np.zeros((1, n_hidden),
                                     dtype=theano.config.floatX))
    out_mat = initialize_matrix(n_hidden, n_output, 'out_mat', rng)
    out_bias = theano.shared(np.zeros((n_output, ),
                                      dtype=theano.config.floatX))
    parameters = [
        W_i, W_f, W_c, W_o, U_i, U_f, U_c, U_o, V_o, b_i, b_f, b_c, b_o, h_0,
        state_0, out_mat, out_bias
    ]

    x, y = initialize_data_nodes(loss_function, input_type, out_every_t)

    def recurrence(x_t, y_t, h_prev, state_prev, cost_prev, acc_prev, W_i, W_f,
                   W_c, W_o, U_i, U_f, U_c, U_o, V_o, b_i, b_f, b_c, b_o,
                   out_mat, out_bias):

        if loss_function == 'CE':
            x_t_W_i = W_i[x_t]
            x_t_W_c = W_c[x_t]
            x_t_W_f = W_f[x_t]
            x_t_W_o = W_o[x_t]
        else:
            x_t_W_i = T.dot(x_t, W_i)
            x_t_W_c = T.dot(x_t, W_c)
            x_t_W_f = T.dot(x_t, W_f)
            x_t_W_o = T.dot(x_t, W_o)

        input_t = T.nnet.sigmoid(x_t_W_i + T.dot(h_prev, U_i) +
                                 b_i.dimshuffle('x', 0))
        # STEPH: save candidate?
        candidate_t = T.tanh(x_t_W_c + T.dot(h_prev, U_c) +
                             b_c.dimshuffle('x', 0))
        forget_t = T.nnet.sigmoid(x_t_W_f + T.dot(h_prev, U_f) +
                                  b_f.dimshuffle('x', 0))
        # STEPH: forget previosu state?

        state_t = input_t * candidate_t + forget_t * state_prev
        # STEPH: so we can both save the input and not forget the previous, OK

        output_t = T.nnet.sigmoid(x_t_W_o + T.dot(h_prev, U_o) +
                                  T.dot(state_t, V_o) + b_o.dimshuffle('x', 0))
        # TODO: (STEPH) double-check maths, here!

        h_t = output_t * T.tanh(state_t)

        # STEPH: same  as other models...
        if out_every_t:
            lin_output = T.dot(h_t, out_mat) + out_bias.dimshuffle('x', 0)
            cost_t, acc_t = compute_cost_t(lin_output, loss_function, y_t)
        else:
            cost_t = theano.shared(np.float32(0.0))
            acc_t = theano.shared(np.float32(0.0))

        return h_t, state_t, cost_t, acc_t

    non_sequences = [
        W_i, W_f, W_c, W_o, U_i, U_f, U_c, U_o, V_o, b_i, b_f, b_c, b_o,
        out_mat, out_bias
    ]

    # STEPH: same as tanhRNN, etc... the scan part is generally duplicated!
    h_0_batch = T.tile(h_0, [x.shape[1], 1])
    state_0_batch = T.tile(state_0, [x.shape[1], 1])

    if out_every_t:
        sequences = [x, y]
    else:
        sequences = [
            x,
            T.tile(theano.shared(np.zeros((1, 1), dtype=theano.config.floatX)),
                   [x.shape[0], 1, 1])
        ]

    outputs_info = [
        h_0_batch, state_0_batch,
        theano.shared(np.float32(0.0)),
        theano.shared(np.float32(0.0))
    ]

    [hidden_states, states, cost_steps,
     acc_steps], updates = theano.scan(fn=recurrence,
                                       sequences=sequences,
                                       non_sequences=non_sequences,
                                       outputs_info=outputs_info)

    if not out_every_t:
        lin_output = T.dot(hidden_states[-1, :, :],
                           out_mat) + out_bias.dimshuffle('x', 0)
        costs = compute_cost_t(lin_output, loss_function, y)
    else:
        cost = cost_steps.mean()
        accuracy = acc_steps.mean()
        costs = [cost, accuracy]

    return [x, y], parameters, costs
Example #51
0
 def get_hidden_values(self, input):
     """ Computes the values of the hidden layer """
     return T.nnet.sigmoid(T.dot(input, self.W) + self.b)
Example #52
0
def IRNN(n_input,
         n_hidden,
         n_output,
         input_type='real',
         out_every_t=False,
         loss_function='CE'):
    # STEPH: this differs from tanhRNN in two places, see below
    np.random.seed(1234)
    rng = np.random.RandomState(1234)

    x, y = initialize_data_nodes(loss_function, input_type, out_every_t)
    inputs = [x, y]

    h_0 = theano.shared(np.zeros((1, n_hidden), dtype=theano.config.floatX))
    V = initialize_matrix(n_input, n_hidden, 'V', rng)
    W = theano.shared(np.identity(n_hidden, dtype=theano.config.floatX))
    # STEPH: W differs from that of tanhRNN: this is just identity!
    out_mat = initialize_matrix(n_hidden, n_output, 'out_mat', rng)
    hidden_bias = theano.shared(
        np.zeros((n_hidden, ), dtype=theano.config.floatX))
    out_bias = theano.shared(np.zeros((n_output, ),
                                      dtype=theano.config.floatX))

    parameters = [h_0, V, W, out_mat, hidden_bias, out_bias]

    def recurrence(x_t, y_t, h_prev, cost_prev, acc_prev, V, W, hidden_bias,
                   out_mat, out_bias):
        if loss_function == 'CE':
            data_lin_output = V[x_t]
        else:
            data_lin_output = T.dot(x_t, V)

        h_t = T.nnet.relu(
            T.dot(h_prev, W) + data_lin_output +
            hidden_bias.dimshuffle('x', 0))
        # STEPH: differs from tanhRNN: here we have relu, there they had tanh
        if out_every_t:
            lin_output = T.dot(h_t, out_mat) + out_bias.dimshuffle('x', 0)
            cost_t, acc_t = compute_cost_t(lin_output, loss_function, y_t)
        else:
            cost_t = theano.shared(np.float32(0.0))
            acc_t = theano.shared(np.float32(0.0))

        return h_t, cost_t, acc_t

    non_sequences = [V, W, hidden_bias, out_mat, out_bias]

    h_0_batch = T.tile(h_0, [x.shape[1], 1])

    if out_every_t:
        sequences = [x, y]
    else:
        sequences = [
            x,
            T.tile(theano.shared(np.zeros((1, 1), dtype=theano.config.floatX)),
                   [x.shape[0], 1, 1])
        ]

    outputs_info = [
        h_0_batch,
        theano.shared(np.float32(0.0)),
        theano.shared(np.float32(0.0))
    ]

    [hidden_states, cost_steps,
     acc_steps], updates = theano.scan(fn=recurrence,
                                       sequences=sequences,
                                       non_sequences=non_sequences,
                                       outputs_info=outputs_info)

    if not out_every_t:
        lin_output = T.dot(hidden_states[-1, :, :],
                           out_mat) + out_bias.dimshuffle('x', 0)
        costs = compute_cost_t(lin_output, loss_function, y)
    else:
        cost = cost_steps.mean()
        accuracy = acc_steps.mean()
        costs = [cost, accuracy]

    return inputs, parameters, costs
Example #53
0
def build_model(alpha, beta, tparams, options):
    trng = RandomStreams(SEED)

    # Used for dropout.
    use_noise = theano.shared(numpy_floatX(0.))

    x_zheng = tensor.matrix('x_zheng', dtype='int32')
    x_zheng_mask = tensor.matrix('x_zheng_mask', dtype=config.floatX)
    x_ni = tensor.matrix('x_ni', dtype='int32')
    x_ni_mask = tensor.matrix('x_ni_mask', dtype=config.floatX)
    y = tensor.vector('y', dtype='int32')

    n_timesteps = x_zheng.shape[0]
    n_samples = x_zheng.shape[1]

    emb_zheng = tparams['Wemb'][x_zheng.flatten()].reshape(
        [n_timesteps, n_samples, options['dim_proj']])

    proj1 = get_layer(options['encoder'])[1](tparams,
                                             emb_zheng,
                                             options,
                                             prefix='lstm_zheng',
                                             mask=x_zheng_mask)
    if options['encoder'] == 'lstm':
        proj_zheng = (proj1 * x_zheng_mask[:, :, None]).sum(axis=0)
        proj_zheng = proj_zheng / x_zheng_mask.sum(axis=0)[:, None]

    emb_ni = tparams['Wemb'][x_ni.flatten()].reshape(
        [n_timesteps, n_samples, options['dim_proj']])

    proj2 = get_layer(options['encoder'])[1](tparams,
                                             emb_ni,
                                             options,
                                             prefix='lstm_ni',
                                             mask=x_ni_mask)

    if options['encoder'] == 'lstm':
        proj_ni = (proj2 * x_ni_mask[:, :, None]).sum(axis=0)
        proj_ni = proj_ni / x_ni_mask.sum(axis=0)[:, None]

    proj = tensor.concatenate((proj_zheng, proj_ni), axis=1)

    if options['use_dropout']:
        proj = dropout_layer(proj, use_noise, trng)

    pred = tensor.nnet.softmax(tensor.dot(proj, tparams['U']) + tparams['b'])

    pred_zheng = tensor.nnet.softmax(
        tensor.dot(proj_zheng, tparams['U_zheng'] + tparams['b']))

    pred_ni = tensor.nnet.softmax(
        tensor.dot(proj_ni, tparams['U_ni'] + tparams['b']))

    f_pred_prob = theano.function([x_zheng, x_zheng_mask, x_ni, x_ni_mask],
                                  pred,
                                  name='f_pred_prob')

    f_pred = theano.function([x_zheng, x_zheng_mask, x_ni, x_ni_mask],
                             pred.argmax(axis=1),
                             name='f_pred')

    f_proj = theano.function([x_zheng, x_zheng_mask, x_ni, x_ni_mask],
                             proj,
                             name='f_proj')

    off = 1e-8
    if pred.dtype == 'float16':
        off = 1e-6

    cost1 = -tensor.log(pred[tensor.arange(n_samples), y] + off).mean()
    cost2 = -tensor.log(pred_zheng[tensor.arange(n_samples), y] + off).mean()
    cost3 = -tensor.log(pred_ni[tensor.arange(n_samples), y] + off).mean()
    cost4 = tensor.sum(tensor.square(proj_zheng - proj_ni), axis=1).mean()
    cost = alpha * (cost1 + cost2 + cost3) + beta * cost4

    return use_noise, x_zheng, x_zheng_mask, x_ni, x_ni_mask, y, f_pred_prob, f_pred, cost1, cost2, cost3, cost4, cost, f_proj
Example #54
0
    def get_reconstructed_input(self, hidden):
        """Computes the reconstructed input given the values of the
        hidden layer

        """
        return  T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)
Example #55
0
def step(TT_x_t, TT_h_tm1, TT_Wxh, TT_Whh, TT_Why):
    TT_h_t = TT.tanh(TT.dot(TT_x_t, TT_Wxh) + TT.dot(TT_h_tm1, TT_Whh))
    TT_y_t = TT.tanh(TT.dot(TT_h_t, TT_Why))
    return TT_h_t, TT_y_t
Example #56
0
 def forward(self, input):
     return T.dot(input, self.W) + self.b
Example #57
0
def GESD(sum_uni_l, sum_uni_r):
    eucli = 1 / (1 + T.sum((sum_uni_l - sum_uni_r)**2))
    kernel = 1 / (1 + T.exp(-(T.dot(sum_uni_l, sum_uni_r.T) + 1)))
    return (eucli * kernel).reshape((1, 1))
Example #58
0
corruption_level = 0.1
training_epochs = 25
learning_rate = 0.1
batch_size = 128

W1 = init_weights(28 * 28, 900)
b1 = init_bias(900)
b1_prime = init_bias(28 * 28)
W1_prime = W1.transpose()
W2 = init_weights(900, 10)
b2 = init_bias(10)

tilde_x = theano_rng.binomial(
    size=x.shape, n=1, p=1 - corruption_level, dtype=theano.config.floatX) * x
y1 = T.nnet.sigmoid(T.dot(tilde_x, W1) + b1)
z1 = T.nnet.sigmoid(T.dot(y1, W1_prime) + b1_prime)
cost1 = -T.mean(T.sum(x * T.log(z1) + (1 - x) * T.log(1 - z1), axis=1))

params1 = [W1, b1, b1_prime]
grads1 = T.grad(cost1, params1)
updates1 = [(param1, param1 - learning_rate * grad1)
            for param1, grad1 in zip(params1, grads1)]
train_da1 = theano.function(inputs=[x],
                            outputs=cost1,
                            updates=updates1,
                            allow_input_downcast=True)

p_y2 = T.nnet.softmax(T.dot(y1, W2) + b2)
y2 = T.argmax(p_y2, axis=1)
cost2 = T.mean(T.nnet.categorical_crossentropy(p_y2, d))
Example #59
0
def Sigmoid(sum_uni_l, sum_uni_r):
    dot = T.dot(sum_uni_l, sum_uni_r.T)
    return T.tanh(1.0 * dot + 1).reshape((1, 1))
Example #60
0
def model(X, w_h, w_o):
    h = T.nnet.sigmoid(T.dot(X, w_h))
    pyx = T.nnet.softmax(T.dot(h, w_o))
    return pyx