def output_probabilistic(self, m_x, v_x):

        m_linear = T.dot(m_x, self.m_W[ 0, :, : ]) + T.tile(self.m_b[ 0, :, : ], [ m_x.shape[ 0 ], 1 ])
        v_linear = T.dot(m_x**2, self.v_W[ 0, :, : ]) + T.dot(v_x, self.m_W[ 0, :, : ]**2) + T.dot(v_x, self.v_W[ 0, :, : ]) + \
            T.tile(self.v_b[ 0, :, : ], [ m_x.shape[ 0 ], 1 ])

        if not self.output_layer:

            # We compute the mean and variance after the ReLU activation

            alpha = m_linear / T.sqrt(v_linear)
            gamma = Network_layer.gamma(-alpha)
            gamma_robust = -alpha - 1.0 / alpha + 2.0 / alpha**3
            gamma_final = T.switch(T.lt(-alpha, T.fill(alpha, 30)), gamma, gamma_robust)

            v_aux = m_linear + T.sqrt(v_linear) * gamma_final

            m_a = Network_layer.n_cdf(alpha) * v_aux
            v_a = m_a * v_aux * Network_layer.n_cdf(-alpha) + Network_layer.n_cdf(alpha) * v_linear * (1 - gamma_final * (gamma_final + alpha))

            return (m_a, v_a)

        else:

            return (m_linear, v_linear)
Ejemplo n.º 2
0
    def make_gaussian_filter(self):
        W_shape = self.get_W_shape()
        k = self.filter_size[0]
        k_low = int(np.floor(-(k-1)/2))
        k_high = k_low+k

        W_std = T.exp(self.W_logstd)
        std_array = T.tile(
            W_std.dimshuffle('x', 0, 'x'),
            (self.num_input_channels, 1, k)
        )

        x = np.arange(k_low, k_high).reshape((1, 1, -1))
        x = T.tile(
            x, (self.num_input_channels, self.num_input_channels, 1)
        ).astype(floatX)

        p1 = (1./(np.sqrt(2.*np.pi))).astype(floatX)
        p2 = np.asarray(2., dtype=floatX)
        gf = (p1/std_array)*T.exp(-x**2/(p2*(std_array**2)))
        # gf = gf.astype(theano.config.floatX)

        mask = np.zeros(W_shape)
        rg = np.arange(self.num_input_channels)
        mask[rg, rg, :] = 1
        mask = mask.astype(floatX)

        gf = gf*mask

        return gf
    def setup_generate(self):

        # dimensions: (batch, time, 12)
        chord_types = T.btensor3()

        # dimensions: (batch, time)
        chord_roots = T.imatrix()

        n_batch, n_time = chord_roots.shape

        specs = [lstmstack.prepare_sample_scan(  start_pos=T.alloc(np.array(encoding.STARTING_POSITION, np.int32), (n_batch)),
                                                    start_out=T.tile(encoding.initial_encoded_form(), (n_batch,1)),
                                                    timestep=T.tile(T.arange(n_time), (n_batch,1)),
                                                    cur_chord_type=chord_types,
                                                    cur_chord_root=chord_roots,
                                                    deterministic_dropout=True )
                    for lstmstack, encoding in zip(self.lstmstacks, self.encodings)]

        updates, all_chosen, all_probs, indiv_probs = helper_generate_from_spec(specs, self.lstmstacks, self.encodings, self.srng, n_batch, n_time, self.bounds, self.normalize_artic_only)

        self.generate_fun = theano.function(
            inputs=[chord_roots, chord_types],
            updates=updates,
            outputs=all_chosen,
            allow_input_downcast=True,
            mode=(NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) if self.nanguard else None))

        self.generate_visualize_fun = theano.function(
            inputs=[chord_roots, chord_types],
            updates=updates,
            outputs=[all_chosen, all_probs] + indiv_probs,
            allow_input_downcast=True,
            mode=(NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) if self.nanguard else None))
Ejemplo n.º 4
0
  def fwd(self, x, V, A, L):
    """
    x : signal
    V : eigenvectors
    A : area 
    L : eigenvalues
    """
    V = V[:,:self.K]
    L = L[:self.K]

    L = L.dimshuffle('x','x',0)

    rho = T.sqrt(T.sum(A))
   
    # Q x 1 x K, a window for each input function
    ghat = self.activation_interp(
            T.batched_dot(T.tile(L, [self.nin,1,1]), self.Winterp))
    # Q x K x N
    V_ = T.tile(V.dimshuffle('x',1,0), [self.nin, 1, 1])
    # Q x K x N
    tmp = (ghat * V).dimshuffle(0,2,1)
    
    # Q x N x N
    transl = rho * T.batched_dot(V_.dimshuffle(0,2,1), tmp)
    transl = A.dimshuffle('x',0,'x') * transl
    
    # Q x K x N
    tmp = (V.dimshuffle(0,'x',1) * x.dimshuffle(0,1,'x')).dimshuffle(1,2,0)
    # Q x K x N
    desc = rho * T.batched_dot(tmp, transl)
    desc = T.abs_(desc)
    
    desc = desc.dimshuffle(2,0,'x',1) # BC01 format : N x Q x 1 x K
    return self.activation(theano.tensor.nnet.conv.conv2d(desc, self.W).flatten(2) + self.b)
Ejemplo n.º 5
0
 def apply(self, v):
     [h_vals, _], _ = theano.scan(fn=self.step, 
                                     sequences = v, 
                                     outputs_info = [T.tile(self.h0, (v.shape[1], 1)),
                                                     T.tile(self.c0, (v.shape[1], 1))] 
                                 )
     return h_vals
Ejemplo n.º 6
0
def lcn_3d_input(data, kernel_shape, n_maps):

    """
    :param data: [examples, depth, filters, height, width]
    :param kernel_shape: int
    :param n_maps: int
    :return: new_x: [examples, depth, filters, height, width]
    """

    # create symbolic variable for the input data
    ftensor5 = T.TensorType('float32', [False] * 5)
    x = ftensor5()

    # # determine the number of maps
    # n_maps = data.shape[2]

    # create 3d filter that spans across all channels / feature maps
    # todo: kernel is not really in 3d; need 3d implementation instead of 2d repeated across third dimension
    # todo: alternative is to keep 2d kernel and extend short range given data size in z-plane; change first kernel_sh.
    filter_shape = (1, kernel_shape[0], n_maps, kernel_shape[1], kernel_shape[2])
    filters = np.resize(gaussian_filter(kernel_shape[1]), filter_shape)
    filters = filters / np.sum(filters)
    filters = sharedX(filters)

    # convolve filter with input signal
    convolution_out = conv3d(
        signals=x,
        filters=filters,
        signals_shape=data.shape,
        filters_shape=filter_shape,
        border_mode='valid'
    )

    # for each pixel, remove mean of 9x9 neighborhood
    mid_0 = int(np.floor(kernel_shape[0] / 2.))
    mid_1 = int(np.floor(kernel_shape[1] / 2.))
    mid_2 = int(np.floor(kernel_shape[2] / 2.))
    mean = T.tile(convolution_out, (1, 1, n_maps, 1, 1))
    padded_mean = T.zeros_like(x)
    padded_mean = T.set_subtensor(padded_mean[:, mid_0:-mid_0, :, mid_1:-mid_1, mid_2:-mid_2], mean)
    centered_data = data - padded_mean

    # scale down norm of 9x9 patch if norm is bigger than 1
    sum_sqr_xx = conv3d(signals=T.sqr(data), filters=filters)
    denominator = T.tile(T.sqrt(sum_sqr_xx), (1, 1, n_maps, 1, 1))
    padded_denominator = T.ones_like(x)
    padded_denominator = T.set_subtensor(
        padded_denominator[:, mid_0:-mid_0, :, mid_1:-mid_1, mid_2:-mid_2], denominator
    )
    per_img_mean = padded_denominator.mean(axis=[1, 2, 3, 4])
    divisor = T.largest(
        per_img_mean.dimshuffle(0, 'x', 'x', 'x', 'x'),
        padded_denominator
    )
    new_x = centered_data / T.maximum(1., divisor)

    # compile theano function
    f = theano.function([x], new_x)

    return f(data)
Ejemplo n.º 7
0
    def est_log_part_fun(self):
        # init first visible data 
        v_mean = T.nnet.softmax(self.base_vbias)[0]
        v_mean_rep = T.tile(v_mean, (self.numruns,)).reshape((self.numruns, self.model.num_vis))
        D = T.tile(T.sum(self.base_vbias, axis=0).dimshuffle('x'), (self.numruns,))
        v_samples, updates = theano.scan(fn=self.multinom_sampler,non_sequences=[v_mean_rep, D], n_steps=10)        
       
        v = v_samples[-1]
        # init logw with beta = 0
        logw = - self.log_p_k(v, 0., D) 
        
        [logw_list, vs, Ds], updates = theano.scan(self.ais_step, sequences = self.betas[1:], outputs_info = [logw, v, None])
        
        logw = logw_list[-1]
        v = vs[-1]
        D = Ds[-1] 
        
        logw += self.log_p_k(v, 1, D)            
        r = logsum(logw) - T.log(self.numruns) 

        log_z_base = T.sum(T.log(1+T.exp(self.base_vbias))) + (self.model.num_hid)*T.log(2)
        log_z_est = r + log_z_base
        
        perform_fun = theano.function([], log_z_est, updates=updates)

        return perform_fun()
Ejemplo n.º 8
0
 def recurrence(x_t, h_tm1, c_tm1):
     i = T.nnet.sigmoid(T.dot(x_t, self.wi) + T.dot(h_tm1, self.wih) + self.bi)  # input gate
     c_proposed = T.tanh(T.dot(x_t, self.wc) + T.dot(h_tm1, self.wch) + self.bc)  # proposed memory cell content
     f = T.nnet.sigmoid(T.dot(x_t, self.wf) + T.dot(h_tm1, self.wfh) + self.bf)  # forget gate
     c_t = (T.tile(i, self.memory_size) * c_proposed) + (T.tile(f, self.memory_size) * c_tm1)  # new memory cell content
     o = T.nnet.sigmoid(T.dot(x_t, self.wo) + T.dot(h_tm1, self.woh) + self.bo)  # output gate
     h_t = T.tile(o, self.memory_size) * T.tanh(c_t)
     return [h_t, c_t]
Ejemplo n.º 9
0
def weighted_binary_cross_entropy_4(pred, target, class_normalization):
    # Mix of 0 and 2
    # From theano
    DIM = pred.shape[1]
    BATCH_SIZE = pred.shape[0]
    N_on_per_batch = (T.transpose(T.tile(target.sum(axis=1), (DIM, 1))) + 1)
    N_off_per_batch = (T.transpose(T.tile((1-target).sum(axis=1), (DIM, 1))) + 1)
    class_norm_tile = T.tile(class_normalization, (BATCH_SIZE, 1))
    return -(class_norm_tile * target * T.log(pred) / N_on_per_batch + (1.0 - target) * T.log(1.0 - pred) / N_off_per_batch)
Ejemplo n.º 10
0
def IRNN(n_input, n_hidden, n_output, input_type='real', out_every_t=False, loss_function='CE'):
    np.random.seed(1234)
    rng = np.random.RandomState(1234)

    x, y = initialize_data_nodes(loss_function, input_type, out_every_t)
    inputs = [x, y]

    h_0 = theano.shared(np.zeros((1, n_hidden), dtype=theano.config.floatX))
    V = initialize_matrix(n_input, n_hidden, 'V', rng)
    W = theano.shared(np.identity(n_hidden, dtype=theano.config.floatX))
    out_mat = initialize_matrix(n_hidden, n_output, 'out_mat', rng)
    hidden_bias = theano.shared(np.zeros((n_hidden,), dtype=theano.config.floatX))
    out_bias = theano.shared(np.zeros((n_output,), dtype=theano.config.floatX))

    parameters = [h_0, V, W, out_mat, hidden_bias, out_bias]

    def recurrence(x_t, y_t, h_prev, cost_prev, acc_prev, V, W, hidden_bias, out_mat, out_bias):
        if loss_function == 'CE':
            data_lin_output = V[x_t]
        else:
            data_lin_output = T.dot(x_t, V)

        h_t = T.nnet.relu(T.dot(h_prev, W) + data_lin_output + hidden_bias.dimshuffle('x', 0))
        if out_every_t:
            lin_output = T.dot(h_t, out_mat) + out_bias.dimshuffle('x', 0)
            cost_t, acc_t = compute_cost_t(lin_output, loss_function, y_t)
        else:
            cost_t = theano.shared(NP_FLOAT(0.0))
            acc_t = theano.shared(NP_FLOAT(0.0))

        return h_t, cost_t, acc_t

    non_sequences = [V, W, hidden_bias, out_mat, out_bias]

    h_0_batch = T.tile(h_0, [x.shape[1], 1])

    if out_every_t:
        sequences = [x, y]
    else:
        sequences = [x, T.tile(theano.shared(np.zeros((1,1), dtype=theano.config.floatX)), [x.shape[0], 1, 1])]

    outputs_info = [h_0_batch, theano.shared(NP_FLOAT(0.0)), theano.shared(NP_FLOAT(0.0))]

    [hidden_states, cost_steps, acc_steps], updates = theano.scan(fn=recurrence,
                                                                  sequences=sequences,
                                                                  non_sequences=non_sequences,
                                                                  outputs_info = outputs_info)

    if not out_every_t:
        lin_output = T.dot(hidden_states[-1,:,:], out_mat) + out_bias.dimshuffle('x', 0)
        costs = compute_cost_t(lin_output, loss_function, y)
    else:
        cost = cost_steps.mean()
        accuracy = acc_steps.mean()
        costs = [cost, accuracy]

    return inputs, parameters, costs
Ejemplo n.º 11
0
def mmd_full(x_t, y_t, alpha=0.5):
    """ Implementation of the full kernel MMD statistic (gaussian kernel)"""
    N = x_t.shape[1]
    M = y_t.shape[1]

    term1 = T.mean(T.exp(-0.5 * (1 / alpha) * T.square(T.repeat(x_t, N) - T.tile(x_t, N))))
    term2 = T.mean(T.exp(-0.5 * (1 / alpha) * T.square(T.repeat(x_t, M) - T.tile(y_t, N))))
    term3 = T.mean(T.exp(-0.5 * (1 / alpha) * T.square(T.repeat(y_t, M) - T.tile(y_t, M))))
    return term1 - 2 * term2 + term3
    def setup_generate(self):

        # dimensions: (batch, time, 12)
        chord_types = T.btensor3()

        # dimensions: (batch, time)
        chord_roots = T.imatrix()

        n_batch, n_time = chord_roots.shape

        spec = self.lstmstack.prepare_sample_scan(  start_pos=T.alloc(np.array(self.encoding.STARTING_POSITION, np.int32), (n_batch)),
                                                    start_out=T.tile(self.encoding.initial_encoded_form(), (n_batch,1)),
                                                    timestep=T.tile(T.arange(n_time), (n_batch,1)),
                                                    cur_chord_type=chord_types,
                                                    cur_chord_root=chord_roots,
                                                    deterministic_dropout=True )

        def _scan_fn(*inputs):
            # inputs is [ spec_sequences..., last_absolute_position, spec_taps..., spec_non_sequences... ]
            inputs = list(inputs)
            last_absolute_chosen = inputs.pop(len(spec.sequences))
            scan_rout = self.lstmstack.sample_scan_routine(spec, *inputs)

            last_rel_pos, last_out, cur_kwargs = scan_rout.send(None)

            new_pos = self.encoding.get_new_relative_position(last_absolute_chosen, last_rel_pos, last_out, self.bounds.lowbound, self.bounds.highbound, **cur_kwargs)
            addtl_kwargs = {
                "last_output": last_out
            }

            out_activations = scan_rout.send((new_pos, addtl_kwargs))
            out_probs = self.encoding.decode_to_probs(out_activations,new_pos,self.bounds.lowbound, self.bounds.highbound)
            sampled_note = Encoding.sample_absolute_probs(self.srng, out_probs)
            encoded_output = self.encoding.note_to_encoding(sampled_note, new_pos, self.bounds.lowbound, self.bounds.highbound)
            scan_outputs = scan_rout.send(encoded_output)
            scan_rout.close()

            return [sampled_note, out_probs] + scan_outputs

        outputs_info = [{"initial":T.zeros((n_batch,),'int32'), "taps":[-1]}, None] + spec.outputs_info
        result, updates = theano.scan(fn=_scan_fn, sequences=spec.sequences, non_sequences=spec.non_sequences, outputs_info=outputs_info)
        all_chosen = result[0].dimshuffle((1,0))
        all_probs = result[1].dimshuffle((1,0,2))

        self.generate_fun = theano.function(
            inputs=[chord_roots, chord_types],
            updates=updates,
            outputs=all_chosen,
            allow_input_downcast=True,
            mode=(NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) if self.nanguard else None))

        self.generate_visualize_fun = theano.function(
            inputs=[chord_roots, chord_types],
            updates=updates,
            outputs=[all_chosen, all_probs],
            allow_input_downcast=True,
            mode=(NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True) if self.nanguard else None))
Ejemplo n.º 13
0
def get_input_vectors(shape, phases, scaling, offset):
    x = T.repeat(offset[0] + T.arange(shape[0]) / scaling, shape[1] * phases).reshape(
        (shape[0], shape[1], phases)) * T.pow(2, T.arange(phases))
    y = T.repeat(T.tile(offset[1] + T.arange(shape[1]) / scaling, shape[0]).reshape(
        (shape[0], shape[1], 1)), phases, axis=2) * T.pow(2, T.arange(phases))
    z = T.tile(offset[2] + 10 * T.arange(phases), shape[0] * shape[1]).reshape((shape[0], shape[1], phases, 1))
    x = x.reshape((shape[0], shape[1], phases, 1))
    y = y.reshape((shape[0], shape[1], phases, 1))
    return T.concatenate([x, y, z], axis=3).reshape((shape[0] * shape[1] * phases, 3)).astype('float32')
Ejemplo n.º 14
0
 def initial_states(self, batch_size, *args, **kwargs):
     states_dict = self.fst.expand({self.fst.fst.start: 0.0})
     states = tensor.as_tensor_variable(
         self.transition.pad(states_dict.keys(), NOT_STATE))
     states = tensor.tile(states[None, :], (batch_size, 1))
     weights = tensor.as_tensor_variable(
         self.transition.pad(states_dict.values(), 0))
     weights = tensor.tile(weights[None, :], (batch_size, 1))
     add = self.probability_computer(states, weights)
     return states, weights, add
Ejemplo n.º 15
0
 def _loopoverallball(self, ballid,batchid):        
     ox=self.middle[batchid][ballid*2].reshape((1,1))
     print "ox:",ox.ndim
     x=T.tile(ox,(self.height,self.width))
     oy=self.middle[batchid][ballid*2+1].reshape((1,1))
     y=T.tile(oy,(self.height,self.width))
     w=T.tile(T.arange(0,self.width),(self.height,)).reshape((self.height,self.width))
     h=T.tile(T.arange(0,self.height).reshape((self.height,1)),(1,self.width))
     cof=(T.pow(x-w,2)+T.pow(y-h,2))*(-1.0/self.sigma)        
     print T.exp(cof).ndim
     return T.exp(cof)
Ejemplo n.º 16
0
    def __init__(self, rng, input, num_filters, input_shape):
        self.K = num_filters
        self.N = input_shape[2] * input_shape[3]
        self.D = input_shape[1]
        self.B = input_shape[0]

        self.input = input

        filter_shape = (self.K, self.D, 1, 1)

        fan_in = numpy.prod(filter_shape[1:])
        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]))

        W_bound = numpy.sqrt(6. / (fan_in + fan_out))
        self.W = theano.shared(
            numpy.asarray(
                rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
                dtype=theano.config.floatX
            ),
            borrow=True
        )
        b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)

        c_bound = numpy.sqrt(1. / (self.K * self.D))
        self.c = theano.shared(
            numpy.asarray(
                rng.uniform(low=-c_bound, high=c_bound, size=(self.K, self.D)),
                dtype=theano.config.floatX
            ),
            borrow=True
        )
        conved = conv2d(input, self.W,
            input_shape=input_shape,
			filter_shape=filter_shape)

        conved = conved + self.b.dimshuffle('x', 0, 'x', 'x')
        conved = conved.reshape((self.B, self.K, self.N))
        a = self.softmax3d(conved)

        x = input.reshape((self.B, self.D, self.N))

        v = theano.shared(numpy.zeros((self.B, self.K, self.D), dtype=theano.config.floatX))

        for k in range(self.K):
            ar = T.tile(a[:,k], (1,self.D)).reshape((self.B, self.D, self.N))
            cr = T.tile(self.c[k].reshape((1,self.D,1)), (self.B, 1, self.N))
            vr = (ar*(x+cr)).sum(2)
            g = T.sqrt((vr**2).sum(1))  # add eps?
            v = T.set_subtensor(v[:,k,:], vr/T.tile(g.reshape((self.B, 1)), (1, self.D)))

        # v = v/T.sqrt((v**2).sum())  # whole normalize
        self.output = v
        self.params = [self.W, self.b, self.c]
Ejemplo n.º 17
0
    def apply(self, v):
        if self.n_batch == 1:
            h_init = [T.tile(self.h0, (v.shape[1], 1)),
                        T.tile(self.c0, (v.shape[1], 1))]
        else:
            h_init = [self.h0, self.c0]

        [h_vals, _], _ = theano.scan(fn=self.step, 
                                        sequences = v, 
                                        outputs_info = h_init)
        return h_vals
Ejemplo n.º 18
0
    def get_output_for(self, input, get_details=False, **kwargs):

        input = input.dimshuffle(1, 0, 2)

        def step(x_t, M_tm1, h_tm1, state_tm1, ww_tm1, wr_tm1, *params):
            # Update the memory (using w_tm1 of the writing heads & M_tm1)
            M_t = self.write_heads.write(h_tm1, ww_tm1, M_tm1)

            # Get the read vector (using w_tm1 of the reading heads & M_t)
            r_t = self.read_heads.read(wr_tm1, M_t)

            # Apply the controller (using x_t, r_t & the requirements for the controller)
            h_t, state_t = self.controller.step(x_t, r_t, h_tm1, state_tm1)

            # Update the weights (using h_t, M_t & w_tm1)
            ww_t = self.write_heads.get_weights(h_t, ww_tm1, M_t)
            wr_t = self.read_heads.get_weights(h_t, wr_tm1, M_t)

            return [M_t, h_t, state_t, ww_t, wr_t]

        memory_init = T.tile(self.memory.memory_init, (input.shape[1], 1, 1))
        memory_init = T.unbroadcast(memory_init, 0)

        write_weights_init = T.tile(self.write_heads.weights_init, (input.shape[1], 1, 1))
        write_weights_init = T.unbroadcast(write_weights_init, 0)
        read_weights_init = T.tile(self.read_heads.weights_init, (input.shape[1], 1, 1))
        read_weights_init = T.unbroadcast(read_weights_init, 0)

        non_seqs = self.controller.get_params() + self.memory.get_params() + \
            self.write_heads.get_params() + self.read_heads.get_params()

        hids, _ = theano.scan(
            fn=step,
            sequences=input,
            outputs_info=[memory_init] + self.controller.outputs_info(input.shape[1]) + \
                         [write_weights_init, read_weights_init],
            non_sequences=non_seqs,
            strict=True)

        # dimshuffle back to (n_batch, n_time_steps, n_features)
        if get_details:
            hid_out = [
                hids[0].dimshuffle(1, 0, 2, 3),
                hids[1].dimshuffle(1, 0, 2),
                hids[2].dimshuffle(1, 0, 2),
                hids[3].dimshuffle(1, 0, 2, 3),
                hids[4].dimshuffle(1, 0, 2, 3)]
        else:
            if self.only_return_final:
                hid_out = hids[1][-1]
            else:
                hid_out = hids[1].dimshuffle(1, 0, 2)

        return hid_out
    def log_f_hat(self):

        v_W = 1.0 / (1.0 / self.N * (1.0 / self.v_W - 1.0 / self.v_prior))
        m_W = 1.0 / self.N * self.m_W / self.v_W * v_W
        v_b = 1.0 / (1.0 / self.N * (1.0 / self.v_b - 1.0 / self.v_prior))
        m_b = 1.0 / self.N * self.m_b / self.v_b * v_b

        log_f_hat_W = T.sum(-0.5 * T.tile(1.0 / v_W, [ self.n_samples, 1, 1 ]) * self.W**2 + \
            T.tile(m_W / v_W, [ self.n_samples, 1, 1 ]) * self.W, axis = [ 1, 2 ], keepdims = True)[ :, :, 0 ]
        log_f_hat_b = T.sum(-0.5 * T.tile(1.0 / v_b, [ self.n_samples, 1, 1 ]) * self.b**2 + \
            T.tile(m_b /  v_b, [ self.n_samples, 1, 1 ]) * self.b, axis = [ 1, 2 ], keepdims = True)[ :, :, 0 ]

        return log_f_hat_W + log_f_hat_b
 def getKMeansLoss(self, latent_space_expression, soft_assignments, t_cluster_centers, num_clusters, latent_space_dim, num_samples, soft_loss=False):
     # Kmeans loss = weighted sum of latent space representation of inputs from the cluster centers
     z = latent_space_expression.reshape((num_samples, 1, latent_space_dim))
     z = T.tile(z, (1, num_clusters, 1))
     u = t_cluster_centers.reshape((1, num_clusters, latent_space_dim))
     u = T.tile(u, (num_samples, 1, 1))
     distances = (z - u).norm(2, axis=2).reshape((num_samples, num_clusters))
     if soft_loss:
         weighted_distances = distances * soft_assignments
         loss = weighted_distances.sum(axis=1).mean()
     else:
         loss = distances.min(axis=1).mean()
     return loss
    def update_sample_weights(self):

        # We update the mean and variances of q

        self.v_W = self.v_prior * self.logistic(self.log_var_param_W)
        self.m_W = self.mean_param_W

        self.v_b = self.v_prior * self.logistic(self.log_var_param_b)
        self.m_b = self.mean_param_b

        # We update the random samples for the network weights

        self.W = self.randomness_W * T.tile(T.sqrt(self.v_W), [ self.n_samples, 1, 1 ]) + T.tile(self.m_W, [ self.n_samples, 1, 1 ])
        self.b = self.randomness_b * T.tile(T.sqrt(self.v_b), [ self.n_samples, 1, 1 ]) + T.tile(self.m_b, [ self.n_samples, 1, 1 ])
Ejemplo n.º 22
0
    def compute_log_averaged_ei(self, x, X, randomness, incumbent):

        # We compute the old predictive mean at x
        
        Kzz = compute_kernel(self.lls, self.lsf, self.z, self.z) + T.eye(self.z.shape[ 0 ]) * self.jitter * T.exp(self.lsf)
        KzzInv = T.nlinalg.MatrixInversePSD()(Kzz)
        LLt = T.dot(self.LParamPost, T.transpose(self.LParamPost))
        covCavityInv = KzzInv + LLt * casting(self.n_points - self.set_for_training) / casting(self.n_points)
        covCavity = T.nlinalg.MatrixInversePSD()(covCavityInv)
        meanCavity = T.dot(covCavity, casting(self.n_points - self.set_for_training) / casting(self.n_points) * self.mParamPost)
        KzzInvmeanCavity = T.dot(KzzInv, meanCavity)
        Kxz = compute_kernel(self.lls, self.lsf, x, self.z)
        m_old_x = T.dot(Kxz, KzzInvmeanCavity)

        # We compute the old predictive mean at X

        KXz = compute_kernel(self.lls, self.lsf, X, self.z)
        m_old_X = T.dot(KXz, KzzInvmeanCavity)

        # We compute the required cross covariance matrices

        KXX = compute_kernel(self.lls, self.lsf, X, X) - T.dot(T.dot(KXz, KzzInv), KXz.T) + T.eye(X.shape[ 0 ]) * self.jitter * T.exp(self.lsf)
        KXXInv = T.nlinalg.MatrixInversePSD()(KXX)

        KxX = compute_kernel(self.lls, self.lsf, x, X)
        xX = T.concatenate([ x, X ], 0)
        KxXz = compute_kernel(self.lls, self.lsf, xX, self.z)
        KxX = KxX - T.dot(T.dot(KxXz[ 0 : x.shape[ 0], : ], KzzInv), KxXz[ x.shape[ 0 ] : xX.shape[ 0 ], : ].T)

        # We compute the new posterior mean

        samples_internal = T.dot(MatrixChol()(KXX), randomness)

        new_predictive_mean = T.tile(m_old_x, [ 1, randomness.shape[ 1 ] ]) + T.dot(KxX, T.dot(KXXInv, samples_internal))

        # We compute the new posterior variance

        z_expanded = T.concatenate([ self.z, X ], 0)
        Kxz_expanded = compute_kernel(self.lls, self.lsf, x, z_expanded)
        Kzz_expanded = compute_kernel(self.lls, self.lsf, z_expanded, z_expanded) + T.eye(z_expanded.shape[ 0 ]) * self.jitter * T.exp(self.lsf)
        Kzz_expandedInv = T.nlinalg.MatrixInversePSD()(Kzz_expanded)
        v_out = T.exp(self.lsf) - T.dot(Kxz_expanded * T.dot(Kxz_expanded, Kzz_expandedInv), T.ones_like(z_expanded[ : , 0 : 1 ]))
        new_predictive_var = T.tile(v_out, [ 1, randomness.shape[ 1 ] ])

        s = (incumbent - new_predictive_mean) / T.sqrt(new_predictive_var)

        log_ei = T.log((incumbent - new_predictive_mean) * ratio(s) + T.sqrt(new_predictive_var)) + log_n_pdf(s)

        return T.mean(LogSumExp(log_ei, 1), 1)
Ejemplo n.º 23
0
def _transform_thin_plate_spline(
        dest_offsets, input, right_mat, L_inv, source_points, out_height,
        out_width, precompute_grid, downsample_factor):

    num_batch, num_channels, height, width = input.shape
    num_control_points = source_points.shape[1]

    # reshape destination offsets to be (num_batch, 2, num_control_points)
    # and add to source_points
    dest_points = source_points + T.reshape(
            dest_offsets, (num_batch, 2, num_control_points))

    # Solve as in ref [2]
    coefficients = T.dot(dest_points, L_inv[:, 3:].T)

    if precompute_grid:

        # Transform each point on the source grid (image_size x image_size)
        right_mat = T.tile(right_mat.dimshuffle('x', 0, 1), (num_batch, 1, 1))
        transformed_points = T.batched_dot(coefficients, right_mat)

    else:

        # Transformed grid
        out_height = T.cast(height / downsample_factor[0], 'int64')
        out_width = T.cast(width / downsample_factor[1], 'int64')
        orig_grid = _meshgrid(out_height, out_width)
        orig_grid = orig_grid[0:2, :]
        orig_grid = T.tile(orig_grid, (num_batch, 1, 1))

        # Transform each point on the source grid (image_size x image_size)
        transformed_points = _get_transformed_points_tps(
                orig_grid, source_points, coefficients, num_control_points,
                num_batch)

    # Get out new points
    x_transformed = transformed_points[:, 0].flatten()
    y_transformed = transformed_points[:, 1].flatten()

    # dimshuffle input to  (bs, height, width, channels)
    input_dim = input.dimshuffle(0, 2, 3, 1)
    input_transformed = _interpolate(
            input_dim, x_transformed, y_transformed,
            out_height, out_width)

    output = T.reshape(input_transformed,
                       (num_batch, out_height, out_width, num_channels))
    output = output.dimshuffle(0, 3, 1, 2)  # dimshuffle to conv format
    return output
    def decode_to_probs(self, activations, relative_position, low_bound, high_bound):
        assert (low_bound%12==0) and (high_bound-low_bound == self.num_octaves*12), "Circle of thirds must evenly divide into octaves"
        squashed = T.reshape(activations, (-1,self.RAW_ENCODING_WIDTH))

        rsp = T.nnet.softmax(squashed[:,:3])
        c1 = T.nnet.softmax(squashed[:,3:7])
        c2 = T.nnet.softmax(squashed[:,7:10])
        octave_choice = T.nnet.softmax(squashed[:,10:])
        octave_notes = T.tile(c1,(1,3)) * T.tile(c2,(1,4))
        full_notes = T.reshape(T.shape_padright(octave_choice) * T.shape_padaxis(octave_notes, 1), (-1,12*self.num_octaves))
        full_probs = T.concatenate([rsp[:,:2], T.shape_padright(rsp[:,2])*full_notes], 1)

        newshape = T.concatenate([activations.shape[:-1],[2+high_bound-low_bound]],0)
        fixed = T.reshape(full_probs, newshape, ndim=activations.ndim)
        return fixed
def logp_theano_claims(l,nObs,T,Z,L,X,O_on):

    #O_on = O_on.astype(np.bool)
    # tempVec is 1-X*Z
    tempVec =  (1. - X.reshape((nObs,1,X.shape[1]))*(Z.T).reshape((1,Z.shape[1],Z.shape[0])))
    # Add the contribution from O = 1
    logLike = TT.log(1-(1-TT.tile(L[np.newaxis,:],(nObs,1))[O_on.nonzero()])*TT.prod(tempVec[O_on.nonzero()],axis=1,no_zeros_in_input=True)).sum()
    #logLike = TT.log(1-(1-TT.tile(L[np.newaxis,:],(nObs,1))[O_on.nonzero()])*tempVec[O_on.nonzero()].prod(axis=1,no_zeros_in_input=True)).sum()
    #logLike = TT.log(1-(1-TT.tile(L[np.newaxis,:],(nObs,1))[O_on.nonzero()])*tempVec[O_on.nonzero()].prod(axis=1)).sum()

    # Add the contribution from O = 0
    logLike += TT.log((1-TT.tile(L[np.newaxis,:],(nObs,1))[(1-O_on).nonzero()])*TT.prod(tempVec[(1-O_on).nonzero()],axis=1,no_zeros_in_input=True)).sum()
    #logLike += TT.log((1-TT.tile(L[np.newaxis,:],(nObs,1))[(1-O_on).nonzero()])*tempVec[(1-O_on).nonzero()].prod(axis=1)).sum()

    return logLike
Ejemplo n.º 26
0
    def def_invert(self, model, batch_size=1, d_weight=0.5, nc=1, lr=0.1, b1=0.9, nz=100, use_bin=True):
        d_weight_r = sharedX(d_weight)
        x_c = T.tensor4()
        m_c = T.tensor4()
        x_e = T.tensor4()
        m_e = T.tensor4()
        z0 = T.matrix()
        z = sharedX(floatX(np_rng.uniform(-1., 1., size=(batch_size, nz))))
        gx = model.model_G(z)
        # input: im_c: 255: no edge; 0: edge; transform=> 1: no edge, 0: edge

        if nc == 1: # gx, range [0, 1] => edge, 1
            gx3 = 1.0-gx #T.tile(gx, (1, 3, 1, 1))
        else:
            gx3 = gx
        mm_c = T.tile(m_c, (1, gx3.shape[1], 1, 1))
        color_all = T.mean(T.sqr(gx3 - x_c) * mm_c, axis=(1, 2, 3)) / (T.mean(m_c, axis=(1, 2, 3)) + sharedX(1e-5))
        gx_edge = self.hog.get_hog(gx3)
        x_edge = self.hog.get_hog(x_e)
        mm_e = T.tile(m_e, (1, gx_edge.shape[1], 1, 1))
        sum_e = T.sum(T.abs_(mm_e))
        sum_x_edge = T.sum(T.abs_(x_edge))
        edge_all = T.mean(T.sqr(x_edge - gx_edge) * mm_e, axis=(1, 2, 3)) / (T.mean(m_e, axis=(1, 2, 3)) + sharedX(1e-5))
        rec_all = color_all + edge_all * sharedX(0.2)
        z_const = sharedX(5.0)
        init_all = T.mean(T.sqr(z0 - z)) * z_const

        if d_weight > 0:
            print('using D')
            p_gen = model.model_D(gx)
            real_all = T.nnet.binary_crossentropy(p_gen, T.ones(p_gen.shape)).T
            cost_all = rec_all + d_weight_r * real_all[0] + init_all
        else:
            print('without D')
            cost_all = rec_all + init_all
            real_all = T.zeros(cost_all.shape)

        cost = T.sum(cost_all)
        d_updater = updates.Adam(lr=sharedX(lr), b1=sharedX(b1))
        output = [gx, cost, cost_all, rec_all, real_all, init_all, sum_e, sum_x_edge]

        print('COMPILING...')
        t = time()

        z_updates = d_updater([z], cost)
        _invert = theano.function(inputs=[x_c, m_c, x_e, m_e, z0], outputs=output, updates=z_updates)
        print('%.2f seconds to compile _invert function' % (time() - t))
        return [_invert, z_updates, z, d_weight_r, z_const]
Ejemplo n.º 27
0
def flow(init_W,init_b,nData):
    import theano
    import theano.tensor as T

    n_layers = len(init_b)

    bias = []
    weights = []
    muStates = []
    for layer_i in xrange(n_layers):
        bias.append(theano.shared(value=init_b[layer_i],
                                    name='b'+str(layer_i),
                                    borrow=True))
        weights.append(theano.shared(value=init_W[layer_i],
                                    name='W'+str(layer_i),
                                    borrow=True))
        muStates.append(T.matrix('mu'+str(layer_i)))

    for layer_i in xrange(n_layers):
        diffe = T.tile(bias[layer_i].copy(), (nData,1))
        # All layers except top
        if layer_i < (n_layers-1):
            W_h = weights[layer_i].dot(muStates[layer_i+1].T).T
            diffe += W_h

        if layer_i > 0:
            vT_W = muStates[layer_i-1].dot(weights[layer_i-1])
            diffe += vT_W

        exK = muStates[layer_i]*T.exp(.5*-diffe) + (1.-muStates[layer_i])*T.exp(.5*diffe)
        flows += exK.sum()
    return flows
Ejemplo n.º 28
0
    def get_regs(self, states_0_, states, M):
        """
        Additional regularization terms.

        """
        regs = 0

        if self.L1_Wrec > 0:
            W = self.params['Wrec']
            regs += self.L1_Wrec * tensor.mean(abs(W))

        if self.L2_Wrec > 0:
            W = self.params['Wrec']
            regs += self.L2_Wrec * tensor.mean(tensor.sqr(W))

        #---------------------------------------------------------------------------------
        # Firing rates
        #---------------------------------------------------------------------------------

        if self.L2_r > 0:
            baseline = 0.

            M_ = (tensor.tile(M.T, (states.shape[-1], 1, 1))).T
            states_all = tensor.concatenate(
                [states_0_.reshape((1, states_0_.shape[0], states_0_.shape[1])), states],
                axis=0
                )
            r = self.f_hidden(states_all)
            regs += self.L2_r * tensor.sum(tensor.sqr(r - baseline)*M_)/tensor.sum(M_)

        #---------------------------------------------------------------------------------

        return regs
Ejemplo n.º 29
0
def _create_maximum_activation_update(output, record, streamindex, topn):
    """
    Calculates update of the topn maximums for one batch of outputs.
    """
    dims, maximums, indices, snapshot = record
    counters = tensor.tile(tensor.shape_padright(
        tensor.arange(output.shape[0]) + streamindex), (1, output.shape[1]))
    if len(dims) == 1:
        # output is a 2d tensor, (cases, units) -> activation
        tmax = output
        # counters is a 2d tensor broadcastable (cases, units) -> case_index
        tind = counters
    else:
        # output is a 4d tensor: fmax flattens it to 3d
        fmax = output.flatten(ndim=3)
        # fargmax is a 2d tensor containing rolled maximum locations
        fargmax = fmax.argmax(axis=2)
        # fetch the maximum. tmax is 2d, (cases, units) -> activation
        tmax = _apply_index(fmax, fargmax, axis=2)
        # targmax is a tuple that separates rolled-up location into (x, y)
        targmax = divmod(fargmax, dims[2])
        # tind is a 3d tensor (cases, units, 3) -> case_index, maxloc
        # this will match indices which is a 3d tensor also
        tind = tensor.stack((counters, ) + targmax, axis=2)
    cmax = tensor.concatenate((maximums, tmax), axis=0)
    cind = tensor.concatenate((indices, tind), axis=0)
    cargsort = (-cmax).argsort(axis=0)[:topn]
    newmax = _apply_perm(cmax, cargsort, axis=0)
    newind = _apply_perm(cind, cargsort, axis=0)
    updates = [(maximums, newmax), (indices, newind)]
    if snapshot:
        csnap = tensor.concatenate((snapshot, output), axis=0)
        newsnap = _apply_perm(csnap, cargsort, axis=0)
        updates.append((snapshot, newsnap))
    return updates
Ejemplo n.º 30
0
def nin(X, param):
    w1, w2, w3, b1, b2, b3 = param
    X = X.dimshuffle(0, 1, 'x', 2, 3)  # (n,32,1,r,c)
    w1 = w1.dimshuffle(0, 1, 2, 'x', 3, 4)  # (64,32,16,1,3,3)
    w2 = w2.dimshuffle(0, 1, 'x', 2, 'x', 'x')  # (64,32,1,16,1,1)
    w3 = w3.dimshuffle(0, 1, 2, 'x', 'x')  # (64,2,32,1,1)
    b1 = b1.dimshuffle(0, 1, 'x', 2, 'x', 'x')  # (64,32,1,16,1,1)
    b2 = b2.dimshuffle(0, 1, 'x', 2, 'x', 'x')  # (64,32,1,1,1,1)
    b3 = b3.dimshuffle(0, 'x', 1, 'x', 'x')  # (64,1,2,1,1)
    indexi = T.arange(w1.shape[0], dtype='int32')  # (0:64)
    indexi = T.repeat(indexi, w1.shape[1], axis=0)
    indexj = T.arange(w1.shape[1], dtype='int32')  # (0:64)
    indexj = T.tile(indexj, w1.shape[0])
    results, updates = scan(fn=metaOp1,
                            sequences=[indexi, indexj],
                            outputs_info=None,
                            non_sequences=[X, w1, w2, b1, b2],
                            strict=True)  # (64*32,n,1,r,c)
    metaShape1 = results.shape[-4], results.shape[-2], results.shape[-1]
    reshaped1 = results.reshape((w1.shape[0], w1.shape[1]) + metaShape1)  # (64,32,n,r,c)
    permuted1 = T.transpose(reshaped1, axes=(0, 2, 1, 3, 4))  # (64,n,32,r,c)
    indexi = T.arange(w1.shape[0], dtype='int32')  # (0:64)
    results, updates = scan(fn=metaOp2,
                            sequences=[indexi],
                            outputs_info=None,
                            non_sequences=[permuted1, w3, b3],
                            strict=True)  # (64,n,2,r,c)
    permuted2 = T.transpose(results, axes=(1, 0, 2, 3, 4))  # (n,64,2,r,c)
    metaShape2 = permuted2.shape[-2], permuted2.shape[-1]
    reshaped2 = permuted2.reshape((permuted2.shape[0], -1) + metaShape2)  # (n,128,r,c)
    return reshaped2
def build_model(tparams,options):
    """
    @function:建立模型
    """
    opt_ret = dict()
    
    trng = RandomStreams(1234)
    use_noise = theano.shared(numpy.float32(0.)) 
    
    x_mask = tensor.matrix('x_mask',dtype='float32')
    y = tensor.matrix('y',dtype='int64')
    y_mask = tensor.matrix('y_mask',dtype='float32')
    
    # 编码器
    x,ctx = build_encoder(tparams,options,trng,use_noise,x_mask,sampling=False)
    n_samples = x.shape[1]
    n_timesteps_trg = y.shape[0]
    
    if options['use_dropout']:
        retain_probability_emb = 1-options['dropout_embedding']
        retain_probability_hidden = 1-options['dropout_hidden']
        retain_probability_target = 1-options['dropout_target']
       
        if options['model_version'] < 0.1:
            scaled = False
        else:
            scaled = True
        rec_dropout_d = shared_dropout_layer((5, n_samples, options['dim']), use_noise, trng, retain_probability_hidden, scaled)
        emb_dropout_d = shared_dropout_layer((2, n_samples, options['dim_word']), use_noise, trng, retain_probability_emb, scaled)
        ctx_dropout_d = shared_dropout_layer((4, n_samples, 2*options['dim']), use_noise, trng, retain_probability_hidden, scaled)
        target_dropout = shared_dropout_layer((n_timesteps_trg, n_samples, 1), use_noise, trng, retain_probability_target, scaled)
        target_dropout = tensor.tile(target_dropout, (1,1,options['dim_word']))
    else:
        rec_dropout_d = theano.shared(numpy.array([1.]*5, dtype='float32'))
        emb_dropout_d = theano.shared(numpy.array([1.]*2, dtype='float32'))
        ctx_dropout_d = theano.shared(numpy.array([1.]*4, dtype='float32'))
    
    # mean of the context (across time) will be used to intialize decoder rnn
    ctx_mean = (ctx*x_mask[:,:,None]).sum(0) / x_mask.sum(0)[:,None]
    # or you can use the last state of forward+backward encoder rnns
    # ctx_mean = concatenate([proj[0][-1],projr[0][-1]],axis=proj[0].ndim-2)
    
    if options['use_dropout']:
        ctx_mean *= shared_dropout_layer((n_samples,2*options['dim']),use_noise,trng,retain_probability_hidden,scaled)
    
    # initial decoder state
    init_state = fflayer(tparams,ctx_mean,options,
                          prefix='ff_state',activ='tanh')
    
    # word embedding (target), we will shift the target sequence one time step
    # to the right. This is done because of the bi-gram connections in the
    # readout and decoder rnn. The first target will be all zeros and we will
    # not condition on the last output.
    emb = tparams['Wemb_dec'][y.flatten()]
    emb = emb.reshape([n_timesteps_trg, n_samples, options['dim_word']])

    emb_shifted = tensor.zeros_like(emb)
    emb_shifted = tensor.set_subtensor(emb_shifted[1:], emb[:-1])
    emb = emb_shifted

    if options['use_dropout']:
        emb *= target_dropout
    
    # decoder - pass through the decoder conditional gru with attention
    proj = gru_cond_layer(tparams, emb, options,
                                prefix='decoder',
                                mask=y_mask, context=ctx,
                                context_mask=x_mask,
                                one_step=False,
                                init_state=init_state,
                                emb_dropout=emb_dropout_d,
                                ctx_dropout=ctx_dropout_d,
                                rec_dropout=rec_dropout_d,
                                profile=profile)
    
    # hidden states of the decoder gru
    proj_h = proj[0]

    # weighted averages of context, generated by attention module
    ctxs = proj[1]
    
    if options['use_dropout']:
        proj_h *= shared_dropout_layer((n_samples, options['dim']), use_noise, trng, retain_probability_hidden, scaled)
        emb *= shared_dropout_layer((n_samples, options['dim_word']), use_noise, trng, retain_probability_emb, scaled)
        ctxs *= shared_dropout_layer((n_samples, 2*options['dim']), use_noise, trng, retain_probability_hidden, scaled)
    
    # weights (alignment matrix) #####LIUCAN: this is where the attention vector is.
    opt_ret['dec_alphas'] = proj[2]

    # compute word probabilities
    logit_lstm = fflayer(tparams, proj_h, options,
                                    prefix='ff_logit_lstm', activ='linear')
    logit_prev = fflayer(tparams, emb, options,
                                    prefix='ff_logit_prev', activ='linear')
    logit_ctx = fflayer(tparams, ctxs, options,
                                   prefix='ff_logit_ctx', activ='linear')
    logit = tensor.tanh(logit_lstm+logit_prev+logit_ctx)

    if options['use_dropout']:
        logit *= shared_dropout_layer((n_samples, options['dim_word']), use_noise, trng, retain_probability_hidden, scaled)
    
    # 生成tj,用于获取质量向量
    tt = logit
    
    
    logit = fflayer(tparams, logit, options,
                                   prefix='ff_logit', activ='linear')
    logit_shp = logit.shape
    probs = tensor.nnet.softmax(logit.reshape([logit_shp[0]*logit_shp[1],
                                               logit_shp[2]]))

    # cost
    y_flat = y.flatten()
    y_flat_idx = tensor.arange(y_flat.shape[0]) * options['n_words_tgt'] + y_flat
    cost = -tensor.log(probs.flatten()[y_flat_idx])
    cost = cost.reshape([y.shape[0], y.shape[1]])
    cost = (cost * y_mask).sum(0)

    return trng, use_noise, x, x_mask, y, y_mask, opt_ret, cost, ctx, tt
def build_encoder(tparams,options,trng,use_noise,x_mask=None,sampling=False):
    
    x = tensor.matrix('x',dtype='int64')
    x.tag.test_value = (numpy.random.rand(5,10)*100).astype('int64')
    
    #for the backward rnn, we just need to invert x
    xr = x[::-1]   #此处有区别 xr = x[:,::-1]
    if x_mask is None:  #测试的时候
        xr_mask = None
    else:
        xr_mask = x_mask[::-1]
    
    #时间步数,和样本个数
    n_timesteps = x.shape[0]
    n_samples = x.shape[1]
    
    #是否使用 dropout
    if options['use_dropout']:
        retain_probability_emb = 1-options['dropout_embedding']
        retain_probability_hidden = 1-options['dropout_hidden']
        retain_probability_source = 1-options['dropout_source']
        if sampling:
            if options['model_version'] < 0.1:
                rec_dropout = theano.shared(numpy.array([retain_probability_hidden]*2, dtype='float32'))
                rec_dropout_r = theano.shared(numpy.array([retain_probability_hidden]*2, dtype='float32'))
                emb_dropout = theano.shared(numpy.array([retain_probability_emb]*2, dtype='float32'))
                emb_dropout_r = theano.shared(numpy.array([retain_probability_emb]*2, dtype='float32'))
                source_dropout = theano.shared(numpy.float32(retain_probability_source))
            else:
                rec_dropout = theano.shared(numpy.array([1.]*2, dtype='float32'))
                rec_dropout_r = theano.shared(numpy.array([1.]*2, dtype='float32'))
                emb_dropout = theano.shared(numpy.array([1.]*2, dtype='float32'))
                emb_dropout_r = theano.shared(numpy.array([1.]*2, dtype='float32'))
                source_dropout = theano.shared(numpy.float32(1.))
        else:
            if options['model_version'] < 0.1:
                scaled = False
            else:
                scaled = True
            rec_dropout = shared_dropout_layer((2, n_samples, options['dim']), use_noise, trng, retain_probability_hidden, scaled)
            rec_dropout_r = shared_dropout_layer((2, n_samples, options['dim']), use_noise, trng, retain_probability_hidden, scaled)
            emb_dropout = shared_dropout_layer((2, n_samples, options['dim_word']), use_noise, trng, retain_probability_emb, scaled)
            emb_dropout_r = shared_dropout_layer((2, n_samples, options['dim_word']), use_noise, trng, retain_probability_emb, scaled)
            source_dropout = shared_dropout_layer((n_timesteps, n_samples, 1), use_noise, trng, retain_probability_source, scaled)
            source_dropout = tensor.tile(source_dropout, (1,1,options['dim_word']))
    else:
        rec_dropout = theano.shared(numpy.array([1.]*2, dtype='float32'))
        rec_dropout_r = theano.shared(numpy.array([1.]*2, dtype='float32'))
        emb_dropout = theano.shared(numpy.array([1.]*2, dtype='float32'))
        emb_dropout_r = theano.shared(numpy.array([1.]*2, dtype='float32'))
    
    # word embedding for forward rnn (source)
    emb = tparams['Wemb'][x.flatten()]     #此处不同
    emb = emb.reshape([n_timesteps,n_samples,options['dim_word']])
    if options['use_dropout']:
        emb *= source_dropout
    
    proj = gru_layer(tparams,emb,options,
                     prefix='encoder',
                     mask=x_mask,
                     emb_dropout=emb_dropout,
                     rec_dropout=rec_dropout,
                     profile=profile)
    
    # word embedding for backward rnn (source)
    embr = tparams['Wemb'][xr.flatten()]
    embr = embr.reshape([n_timesteps,n_samples,options['dim_word']])
    if options['use_dropout']:
        if sampling:
            embr *= source_dropout
        else:
            embr *= source_dropout[::-1]
    
    projr = gru_layer(tparams,embr,options,
                      prefix='encoder_r',
                      mask=xr_mask,
                      emb_dropout=emb_dropout_r,
                      rec_dropout=rec_dropout,
                      profile=profile)
    
    #context will be the concatenation of forward and backward rnns
    ctx = concatenate([proj[0],projr[0][::-1]],axis=proj[0].ndim-1)
    
    return x,ctx
Ejemplo n.º 33
0
    def _setup_functions(self):

        # Actual parameter lengths.
        #sh_w_n = (self.n_state + self.n_actions + 1, self.n_state + 1, self.n_state)
        #print("sh_w_n", sh_w_n)
        sh_w_n = (self.n_actions + 1, self.n_state + 1, self.n_state)
        print("sh_w_n", sh_w_n)
        sh_w_t = (self.n_tex + 1, self.n_state + 1, self.n_ray)
        print("sh_w_t", sh_w_t)
        sh_l1 = (self.n_ray + self.n_key, self.n_interaction)
        print("sh_l1", sh_l1)
        sh_l2 = (self.n_interaction, 1)
        print("sh_l2", sh_l2)

        # Memory cells.
        sh_mk = (self.n_scene, self.n_key)
        sh_mc = (self.n_scene, 4)
        print("sh_mk", sh_mk)
        print("sh_mc", sh_mc)

        if not hasattr(self, "params"):
            print('generating weights')

            # (A+1)x(S+1)xS
            wn = uniform(sh_w_n, scale=0.2)
            # (P+1)x(S+1)xR
            wt = uniform(sh_w_t, scale=0.2)
            # (R+K)xH
            wl1 = uniform(sh_l1, scale=0.2)
            # H
            wb1 = shared0s((self.n_interaction, ))
            # Hx1
            wl2 = uniform(sh_l2, scale=0.2)
            # MxK
            wmk = uniform(sh_mk, scale=0.2)
            # MxC
            wmc = uniform(sh_mc, scale=0.2)

            self.params = [wn, wt, wl1, wb1, wl2, wmk, wmc]
        else:
            wn, wt, wl1, wb1, wl2, wmk, wmc = self.params

        #TxNxA
        A = sharedX(np.zeros((2, 2, 2)), name="A")
        #TxNxP
        P = sharedX(np.zeros((2, 2, 2)), name="P")
        #TxNxC
        y = sharedX(np.zeros((2, 2, 2)), name="y")

        self.inputs = {"A": A, "P": P, "y": y}

        # Inputs: NxS, NxA
        def state_transform(a_, s_):
            # Nx(S+1)xS
            temp_ = T.tensordot(T.concatenate(
                [a_, T.ones((s_.shape[0], 1))], axis=1),
                                wn,
                                axes=[1, 0])
            # NxS
            return T.sum(
                temp_ * T.concatenate([s_, T.ones(
                    (s_.shape[0], 1))], axis=1).dimshuffle([0, 1, 'x']),
                axis=1)
            #return s_

        # TxNxS
        S, _ = theano.scan(fn=state_transform,
                           outputs_info=[T.zeros([A.shape[1], self.n_state])],
                           sequences=[A])

        # TxNx(S+1)xR
        temp_ = T.tensordot(T.concatenate(
            [P, T.ones([S.shape[0], S.shape[1], 1])], axis=2),
                            wt,
                            axes=[2, 0])

        # TxNxR Ray Elements.
        R = T.sum(temp_ *
                  T.concatenate([S, T.ones((S.shape[0], S.shape[1], 1))],
                                axis=2).dimshuffle([0, 1, 2, 'x']),
                  axis=2)

        # TxNxMx(R+K) Transformation input.
        R_2 = T.concatenate([
            T.tile(R.dimshuffle([0, 1, 'x', 2]), [1, 1, self.n_scene, 1]),
            T.tile(wmk.dimshuffle(['x', 'x', 0, 1]),
                   [R.shape[0], R.shape[1], 1, 1])
        ],
                            axis=3)

        # TxNxMxH
        L1 = sigmoid(
            T.tensordot(R_2, wl1, axes=[3, 0]) +
            wb1.dimshuffle(['x', 'x', 'x', 0]))
        # TxNxM Soft attention weights.
        Att_temp = T.exp(T.tensordot(L1, wl2, axes=[3, 0]).sum(axis=3))
        Att = Att_temp / (T.sum(Att_temp, axis=2, keepdims=True) + 0.01)
        #Att = sigmoid( T.tensordot(L1, wl2, axes=[3,0]).sum( axis=3 ) )

        # TxNxC final colors.
        Col = T.tensordot(Att, wmc, axes=[2, 0])

        rec_cost = T.sum(T.sqr(Col - y))  # / T.cast(X.shape[0], 'float32')
        cost = rec_cost

        print('getting updates')
        #updates = Adam([wt,wn,wmk,wl1,wb1,wl2,wmc], cost)
        updates = Adam(self.params, cost)

        print('compiling')
        self._fit_function = theano.function([], cost, updates=updates)

        theano.printing.debugprint(self._fit_function)

        #self._predict = theano.function([A, P], Col, allow_input_downcast=True)
        self._predict = theano.function([], Col, allow_input_downcast=True)
        #self._next_state = theano.function([A], S, allow_input_downcast=True)
        self._next_state = theano.function([], S, allow_input_downcast=True)
        #self._predict_attn = theano.function([A, P], Att, allow_input_downcast=True)
        self._predict_attn = theano.function([],
                                             Att,
                                             allow_input_downcast=True)
        # Output just the cost to check with a test set.
        #self._cost = theano.function([A,P,y], cost, allow_input_downcast=True)
        self._cost = theano.function([], cost, allow_input_downcast=True)
Ejemplo n.º 34
0
def LSTM(n_input,
         n_hidden,
         n_output,
         input_type='real',
         out_every_t=False,
         loss_function='CE'):
    np.random.seed(1234)
    rng = np.random.RandomState(1234)

    W_i = initialize_matrix(n_input, n_hidden, 'W_i', rng)
    W_f = initialize_matrix(n_input, n_hidden, 'W_f', rng)
    W_c = initialize_matrix(n_input, n_hidden, 'W_c', rng)
    W_o = initialize_matrix(n_input, n_hidden, 'W_o', rng)
    U_i = initialize_matrix(n_hidden, n_hidden, 'U_i', rng)
    U_f = initialize_matrix(n_hidden, n_hidden, 'U_f', rng)
    U_c = initialize_matrix(n_hidden, n_hidden, 'U_c', rng)
    U_o = initialize_matrix(n_hidden, n_hidden, 'U_o', rng)
    V_o = initialize_matrix(n_hidden, n_hidden, 'V_o', rng)
    b_i = theano.shared(np.zeros((n_hidden, ), dtype=theano.config.floatX))
    b_f = theano.shared(np.ones((n_hidden, ), dtype=theano.config.floatX))
    b_c = theano.shared(np.zeros((n_hidden, ), dtype=theano.config.floatX))
    b_o = theano.shared(np.zeros((n_hidden, ), dtype=theano.config.floatX))
    h_0 = theano.shared(np.zeros((1, n_hidden), dtype=theano.config.floatX))
    state_0 = theano.shared(np.zeros((1, n_hidden),
                                     dtype=theano.config.floatX))
    out_mat = initialize_matrix(n_hidden, n_output, 'out_mat', rng)
    out_bias = theano.shared(np.zeros((n_output, ),
                                      dtype=theano.config.floatX))
    parameters = [
        W_i, W_f, W_c, W_o, U_i, U_f, U_c, U_o, V_o, b_i, b_f, b_c, b_o, h_0,
        state_0, out_mat, out_bias
    ]

    x, y = initialize_data_nodes(loss_function, input_type, out_every_t)

    def recurrence(x_t, y_t, h_prev, state_prev, cost_prev, acc_prev, W_i, W_f,
                   W_c, W_o, U_i, U_f, U_c, U_o, V_o, b_i, b_f, b_c, b_o,
                   out_mat, out_bias):

        if loss_function == 'CE':
            x_t_W_i = W_i[x_t]
            x_t_W_c = W_c[x_t]
            x_t_W_f = W_f[x_t]
            x_t_W_o = W_o[x_t]
        else:
            x_t_W_i = T.dot(x_t, W_i)
            x_t_W_c = T.dot(x_t, W_c)
            x_t_W_f = T.dot(x_t, W_f)
            x_t_W_o = T.dot(x_t, W_o)

        input_t = T.nnet.sigmoid(x_t_W_i + T.dot(h_prev, U_i) +
                                 b_i.dimshuffle('x', 0))
        candidate_t = T.tanh(x_t_W_c + T.dot(h_prev, U_c) +
                             b_c.dimshuffle('x', 0))
        forget_t = T.nnet.sigmoid(x_t_W_f + T.dot(h_prev, U_f) +
                                  b_f.dimshuffle('x', 0))

        state_t = input_t * candidate_t + forget_t * state_prev

        output_t = T.nnet.sigmoid(x_t_W_o + T.dot(h_prev, U_o) +
                                  T.dot(state_t, V_o) + b_o.dimshuffle('x', 0))

        h_t = output_t * T.tanh(state_t)

        if out_every_t:
            lin_output = T.dot(h_t, out_mat) + out_bias.dimshuffle('x', 0)
            cost_t, acc_t = compute_cost_t(lin_output, loss_function, y_t)
        else:
            cost_t = theano.shared(np.float32(0.0))
            acc_t = theano.shared(np.float32(0.0))

        return h_t, state_t, cost_t, acc_t

    non_sequences = [
        W_i, W_f, W_c, W_o, U_i, U_f, U_c, U_o, V_o, b_i, b_f, b_c, b_o,
        out_mat, out_bias
    ]

    h_0_batch = T.tile(h_0, [x.shape[1], 1])
    state_0_batch = T.tile(state_0, [x.shape[1], 1])

    if out_every_t:
        sequences = [x, y]
    else:
        sequences = [
            x,
            T.tile(theano.shared(np.zeros((1, 1), dtype=theano.config.floatX)),
                   [x.shape[0], 1, 1])
        ]

    outputs_info = [
        h_0_batch, state_0_batch,
        theano.shared(np.float32(0.0)),
        theano.shared(np.float32(0.0))
    ]

    [hidden_states, states, cost_steps,
     acc_steps], updates = theano.scan(fn=recurrence,
                                       sequences=sequences,
                                       non_sequences=non_sequences,
                                       outputs_info=outputs_info)

    if not out_every_t:
        lin_output = T.dot(hidden_states[-1, :, :],
                           out_mat) + out_bias.dimshuffle('x', 0)
        costs = compute_cost_t(lin_output, loss_function, y)
    else:
        cost = cost_steps.mean()
        accuracy = acc_steps.mean()
        costs = [cost, accuracy]

    return [x, y], parameters, costs
Ejemplo n.º 35
0
def train_auto(fun,
               train,
               transform,
               testdir,
               outdir,
               num_epochs_mse=30,
               num_epochs_ILD=10,
               model="1.pkl",
               scale_factor=0.3,
               load=False,
               skip_train_mse=False,
               skip_train_ILD=False,
               skip_sep=False,
               chunk_size=60,
               chunk_overlap=2,
               nsamples=40,
               batch_size=32,
               batch_memory=50,
               time_context=30,
               overlap=25,
               nprocs=4,
               mult_factor_in=0.3,
               mult_factor_out=0.3,
               mix_type='mixture'):
    """
    Trains a network built with \"fun\" with the data generated with \"train\"
    and then separates the files in \"testdir\",writing the result in \"outdir\"

    Parameters
    ----------
    fun : lasagne network object, Theano tensor
        The network to be trained
    transform : transformFFT object
        The Transform object which was used to compute the features (see compute_features_DSD100.py)
    testdir : string, optional
        The directory where the files to be separated are located
    outdir : string, optional
        The directory where to write the separated files
    num_epochs : int, optional
        The number the epochs to train for (one epoch is when all examples in the dataset are seen by the network)
    model : string, optional
        The path where to save the trained model (theano tensor containing the network)
    scale_factor : float, optional
        Scale the magnitude of the files to be separated with this factor
    Yields
    ------
    losser : list
        The losses for each epoch, stored in a list
    """

    logging.info("Building Autoencoder")
    input_var = T.tensor4('inputs')
    input_mask = T.tensor4('input_mask')
    target_var = T.tensor4('targets')

    theano_rng = RandomStreams(128)

    eps = 1e-12

    sources = ['vocals', 'bass', 'drums', 'other']

    nchannels = int(train.channels_in)
    nsources = int(train.channels_out / train.channels_in)

    print 'nchannels: ', nchannels
    print 'nsources: ', nsources

    input_size = int(float(transform.frameSize) / 2 + 1)

    rand_num = theano_rng.normal(size=(batch_size, nsources, time_context,
                                       input_size),
                                 avg=0.0,
                                 std=0.1,
                                 dtype=theano.config.floatX)

    net = fun(input_var=input_var,
              batch_size=batch_size,
              time_context=time_context,
              feat_size=input_size,
              nchannels=nchannels,
              nsources=nsources)
    network = net['l_out']
    if load:
        params = load_model(model)
        lasagne.layers.set_all_param_values(network, params)

    prediction = lasagne.layers.get_output(network, deterministic=True)

    sourceall = []
    errors_insts = []
    loss = 0

    sep_chann = []

    # prediction example for 2 sources in 2 channels:
    # 0, 1 source 0 in channel 0 and 1
    # 2, 3 source 1 in channel 0 and 1
    for j in range(nchannels):
        masksum = T.sum(prediction[:, j::nchannels, :, :], axis=1)
        temp = T.tile(masksum.dimshuffle(0, 'x', 1, 2), (1, nsources, 1, 1))
        mask = prediction[:, j::nchannels, :, :] / (temp + eps * rand_num)
        source = mask * T.tile(input_var[:, j:j + 1, :, :],
                               (1, nsources, 1, 1)) + eps * rand_num
        sourceall.append(source)

        sep_chann.append(source)
        train_loss_recon = lasagne.objectives.squared_error(
            source, target_var[:, j::nchannels, :, :])

        errors_inst = abs(train_loss_recon.sum(axis=(0, 2, 3)))

        errors_insts.append(errors_inst)

        loss = loss + abs(train_loss_recon.sum())

    params1 = lasagne.layers.get_all_params(network, trainable=True)

    updates = lasagne.updates.adadelta(loss, params1)

    train_fn_mse = theano.function([input_var, target_var],
                                   loss,
                                   updates=updates,
                                   allow_input_downcast=True)

    train_fn1 = theano.function([input_var, target_var],
                                errors_insts,
                                allow_input_downcast=True)

    #----------NEW ILD LOSS CONDITION----------

    rand_num2 = theano_rng.normal(
        size=(batch_size, nsources, time_context, input_size),
        avg=0.0,
        std=0.1,
        dtype=theano.config.floatX)  #nsources a primera dim?

    #estimate

    interaural_spec_est = sep_chann[0] / (sep_chann[1] + eps * rand_num2)

    alpha_est = 20 * np.log10(abs(interaural_spec_est + eps * rand_num2))
    alpha_est_mean = alpha_est.mean(axis=(0, 1, 2))

    #groundtruth

    interaural_spec_gt = target_var[:, 0::nchannels, :, :] / (
        target_var[:, 1::nchannels, :, :] + eps * rand_num2)

    alpha_gt = 20 * np.log10(abs(interaural_spec_gt + eps * rand_num2))
    alpha_gt_mean = alpha_gt.mean(axis=(0, 1, 2))

    train_loss_ild = lasagne.objectives.squared_error(alpha_est_mean,
                                                      alpha_gt_mean)

    loss = loss + (abs(train_loss_ild.sum()) / 500)

    #------------------------------------------

    predict_function = theano.function([input_var],
                                       sourceall,
                                       allow_input_downcast=True)

    losser = []

    if not skip_train_mse:
        logging.info("1st MSE training stage...")
        for epoch in range(num_epochs_mse):

            train_err = 0
            train_batches = 0
            errs = np.zeros((nchannels, nsources))
            start_time = time.time()
            for batch in range(train.iteration_size):
                inputs, target = train()
                train_err += train_fn_mse(inputs, target)
                errs += np.array(train_fn1(inputs, target))
                train_batches += 1

            logging.info("Epoch {} of {} took {:.3f}s".format(
                epoch + 1, num_epochs_mse,
                time.time() - start_time))
            logging.info("  training loss:\t\t{:.6f}".format(train_err /
                                                             train_batches))
            for j in range(nchannels):
                for i in range(nsources):
                    logging.info("  training loss for " + sources[i] +
                                 " in mic " + str(j) +
                                 ":\t\t{:.6f}".format(errs[j][i] /
                                                      train_batches))

            model_noILD = model[:-4] + '_noILD' + model[-4:]
            print 'model_noILD: ', model_noILD
            save_model(model_noILD, network)
            losser.append(train_err / train_batches)


#NEW ILD TRAINING---------------------------------------------------------

    if not skip_train_ILD:
        if not skip_train_mse:
            params = load_model(model_noILD)
            lasagne.layers.set_all_param_values(network, params)
        params1 = lasagne.layers.get_all_params(network, trainable=True)
        updates = lasagne.updates.adadelta(loss, params1)
        train_fn_ILD = theano.function([input_var, target_var],
                                       loss,
                                       updates=updates,
                                       allow_input_downcast=True)

        logging.info("ILD training stage...")

        for epoch in range(num_epochs_ILD):

            train_err = 0
            train_batches = 0

            start_time = time.time()
            for batch in range(train.iteration_size):
                inputs, target = train()

                train_err += train_fn_ILD(inputs, target)
                train_batches += 1

            logging.info("Epoch {} of {} took {:.3f}s".format(
                epoch + 1, num_epochs_ILD,
                time.time() - start_time))
            logging.info("  training loss:\t\t{:.6f}".format(train_err /
                                                             train_batches))

            model_ILD = model[:-4] + '_ILD' + model[-4:]
            print 'model_ILD: ', model_ILD
            save_model(model_ILD, network)
            losser.append(train_err / train_batches)

    if not skip_train_mse:
        logging.info("2nd MSE training stage...")

        params = load_model(model_ILD)
        lasagne.layers.set_all_param_values(network, params)
        params1 = lasagne.layers.get_all_params(network, trainable=True)
        updates = lasagne.updates.adadelta(loss, params1)

        for epoch in range(num_epochs_mse):

            train_err = 0
            train_batches = 0
            errs = np.zeros((nchannels, nsources))
            start_time = time.time()
            for batch in range(train.iteration_size):
                inputs, target = train()
                train_err += train_fn_mse(inputs, target)
                errs += np.array(train_fn1(inputs, target))
                train_batches += 1

            logging.info("Epoch {} of {} took {:.3f}s".format(
                epoch + 1, num_epochs_mse,
                time.time() - start_time))
            logging.info("  training loss:\t\t{:.6f}".format(train_err /
                                                             train_batches))
            for j in range(nchannels):
                for i in range(nsources):
                    logging.info("  training loss for " + sources[i] +
                                 " in mic " + str(j) +
                                 ":\t\t{:.6f}".format(errs[j][i] /
                                                      train_batches))

            model_ILD_extra_mse = model[:-4] + '_ILD_extra_mse' + model[-4:]
            print 'model_ILD_extra_mse: ', model_ILD_extra_mse
            save_model(model_ILD_extra_mse, network)
            losser.append(train_err / train_batches)

    if not skip_sep:

        logging.info("Separating")

        subsets = ['Dev', 'Test']
        for sub in subsets:
            for d in sorted(os.listdir(os.path.join(db, 'Mixtures', sub))):
                if not d.startswith('.'):
                    print os.path.join(db, 'Mixtures', sub, d,
                                       mix_type + '.wav')
                    audio, sampleRate, bitrate = util.readAudioScipy(
                        os.path.join(db, 'Mixtures', sub, d,
                                     mix_type + '.wav'))
                    nsamples = audio.shape[0]
                    sep_audio = np.zeros(
                        (nsamples, len(sources), audio.shape[1]))

                    mag, ph = transform.compute_transform(audio, phase=True)
                    mag = scale_factor * mag.astype(np.float32)
                    nframes = mag.shape[-2]

                    batches_mag, nchunks = util.generate_overlapadd(
                        mag,
                        input_size=mag.shape[-1],
                        time_context=train.time_context,
                        overlap=train.overlap,
                        batch_size=train.batch_size,
                        sampleRate=sampleRate)
                    mag = None

                    output = []
                    for b in range(len(batches_mag)):
                        output.append(predict_function(batches_mag[b]))
                    output = np.array(output)

                    for j in range(audio.shape[1]):
                        mm = util.overlapadd_multi(np.swapaxes(
                            output[:, j:j + 1, :, :, :, :], 1, 3),
                                                   batches_mag,
                                                   nchunks,
                                                   overlap=train.overlap)
                        for i in range(len(sources)):
                            audio_out = transform.compute_inverse(
                                mm[i, :ph.shape[1], :] / scale_factor, ph[j])
                            sep_audio[:, i, j] = audio_out[:len(sep_audio)]

                    print 'Saving separation: ', outdir
                    if not os.path.exists(os.path.join(outdir)):
                        os.makedirs(os.path.join(outdir))
                        print 'Creating model folder'
                    if not os.path.exists(os.path.join(outdir, 'Sources')):
                        os.makedirs(os.path.join(outdir, 'Sources'))
                        print 'Creating Sources folder: ', os.path.join(
                            outdir, 'Sources')
                    if not os.path.exists(os.path.join(outdir, 'Sources',
                                                       sub)):
                        os.makedirs(os.path.join(outdir, 'Sources', sub))
                        print 'Creating subset folder'
                    if not os.path.exists(
                            os.path.join(outdir, 'Sources', sub, d)):
                        os.makedirs(os.path.join(outdir, 'Sources', sub, d))
                        print 'Creating song folder', os.path.join(
                            outdir, 'Sources', sub, d)
                    for i in range(len(sources)):
                        print 'Final audio file: ', i, os.path.join(
                            outdir, 'Sources', sub, d, sources[i] + '.wav'
                        ), 'nsamples: ', nsamples, 'len sep_audio :', len(
                            sep_audio)
                        util.writeAudioScipy(
                            os.path.join(outdir, 'Sources', sub, d,
                                         sources[i] + '.wav'),
                            sep_audio[:nsamples, i, :], sampleRate, bitrate)

    return losser
Ejemplo n.º 36
0
def hmetad_rm1way(data: dict, sample_model: bool = True, **kwargs: int):
    """Compute hierachical meta-d' at the subject level.

    This is an internal function. The repeated measures model must be
    called using :py:func:`metadPy.hierarchical.hmetad`.

    Parameters
    ----------
    data : dict
        Response data.
    sample_model : boolean
        If `False`, only the model is returned without sampling.
    **kwargs : keyword arguments
        All keyword arguments are passed to `func::pymc3.sampling.sample`.

    Returns
    -------
    model : :py:class:`pymc3.Model` instance
        The pymc3 model. Encapsulates the variables and likelihood factors.
    trace : :py:class:`pymc3.backends.base.MultiTrace` or
        :py:class:`arviz.InferenceData`
        A `MultiTrace` or `ArviZ InferenceData` object that contains the
        samples.

    References
    ----------
    .. [#] Fleming, S.M. (2017) HMeta-d: hierarchical Bayesian estimation
    of metacognitive efficiency from confidence ratings, Neuroscience of
    Consciousness, 3(1) nix007, https://doi.org/10.1093/nc/nix007
    """
    nSubj = data["nSubj"]
    nCond = data["nCond"]
    nRatings = data["nRatings"]
    hits = data["hits"].reshape(nSubj, 2)
    falsealarms = data["falsealarms"].reshape(nSubj, 2)
    counts = data["counts"]
    Tol = data["Tol"]
    cr = data["cr"].reshape(nSubj, 2)
    m = data["m"].reshape(nSubj, 2)
    c1 = data["c1"].reshape(nSubj, 2, 1)
    d1 = data["d1"].reshape(nSubj, 2, 1)

    with Model() as model:

        #############
        # Hyperpriors
        #############
        mu_c2 = Normal("mu_c2",
                       tau=0.01,
                       shape=(1, ),
                       testval=np.random.rand() * 0.1)
        sigma_c2 = HalfNormal("sigma_c2",
                              tau=0.01,
                              shape=(1, ),
                              testval=np.random.rand() * 0.1)

        mu_D = Normal("mu_D",
                      tau=0.001,
                      shape=(1),
                      testval=np.random.rand() * 0.1)
        sigma_D = HalfNormal("sigma_D",
                             tau=0.1,
                             shape=(1),
                             testval=np.random.rand() * 0.1)

        mu_Cond1 = Normal("mu_Cond1",
                          mu=0,
                          tau=0.001,
                          shape=(1),
                          testval=np.random.rand() * 0.1)
        sigma_Cond1 = HalfNormal("sigma_Cond1",
                                 tau=0.1,
                                 shape=(1),
                                 testval=np.random.rand() * 0.1)

        #############################
        # Hyperpriors - Subject level
        #############################
        dbase_tilde = Normal(
            "dbase_tilde",
            mu=0,
            sigma=1,
            shape=(nSubj, 1, 1),
        )
        dbase = Deterministic("dbase", mu_D + sigma_D * dbase_tilde)

        Bd_Cond1_tilde = Normal(
            "Bd_Cond1_tilde",
            mu=0,
            sigma=1,
            shape=(nSubj, 1, 1),
        )

        Bd_Cond1 = Deterministic(
            "Bd_Cond1",
            mu_Cond1 + sigma_Cond1 * Bd_Cond1_tilde,
        )

        lambda_logMratio = Gamma(
            "lambda_logMratio",
            alpha=0.001,
            beta=0.001,
            shape=(nSubj, 1, 1),
        )
        sigma_logMratio = Deterministic("sigma_logMratio",
                                        1 / math.sqrt(lambda_logMratio))

        ###############################
        # Hypterprior - Condition level
        ###############################
        mu_regression = [dbase + (Bd_Cond1 * c) for c in range(nCond)]

        log_mRatio_tilde = Normal("log_mRatio_tilde",
                                  mu=0,
                                  sigma=1,
                                  shape=(nSubj, 1, 1))
        log_mRatio = Deterministic(
            "log_mRatio",
            tt.stack(mu_regression, axis=1)[:, :, :, 0] +
            tt.tile(log_mRatio_tilde,
                    (1, 2, 1)) * tt.tile(sigma_logMratio, (1, 2, 1)),
        )

        mRatio = Deterministic("mRatio", tt.exp(log_mRatio))

        # Means of SDT distributions
        metad = Deterministic("metad", mRatio * d1)
        S2mu = Deterministic("S2mu", metad / 2)
        S1mu = Deterministic("S1mu", -metad / 2)

        # TYPE 2 SDT MODEL (META-D)
        # Multinomial likelihood for response counts
        # Specify ordered prior on criteria
        # bounded above and below by Type 1 c
        cS1_hn = Normal(
            "cS1_hn",
            mu=0,
            sigma=1,
            shape=(nSubj, nCond, nRatings - 1),
            testval=np.linspace(-1.5, -0.5, nRatings - 1).reshape(
                1, 1, nRatings - 1).repeat(nSubj, axis=0).repeat(nCond,
                                                                 axis=1),
        )
        cS1 = Deterministic("cS1", -mu_c2 + (cS1_hn * sigma_c2))

        cS2_hn = Normal(
            "cS2_hn",
            mu=0,
            sigma=1,
            shape=(nSubj, nCond, nRatings - 1),
            testval=np.linspace(0.5, 1.5, nRatings - 1).reshape(
                1, 1, nRatings - 1).repeat(nSubj, axis=0).repeat(nCond,
                                                                 axis=1),
        )
        cS2 = Deterministic("cS2", mu_c2 + (cS2_hn * sigma_c2))

        # Calculate normalisation constants
        C_area_rS1 = cumulative_normal(c1 - S1mu)
        I_area_rS1 = cumulative_normal(c1 - S2mu)
        C_area_rS2 = 1 - cumulative_normal(c1 - S2mu)
        I_area_rS2 = 1 - cumulative_normal(c1 - S1mu)

        # Get nC_rS1 probs
        nC_rS1 = cumulative_normal(cS1 - S1mu) / C_area_rS1
        nC_rS1 = Deterministic(
            "nC_rS1",
            math.concatenate(
                ([
                    cumulative_normal(cS1[:, :, 0].reshape((nSubj, 2, 1)) -
                                      S1mu) / C_area_rS1,
                    nC_rS1[:, :, 1:] - nC_rS1[:, :, :-1],
                    ((cumulative_normal(c1 - S1mu) -
                      cumulative_normal(cS1[:, :, (nRatings - 2)].reshape(
                          (nSubj, 2, 1)) - S1mu)) / C_area_rS1),
                ]),
                axis=2,
            ),
        )

        # Get nI_rS2 probs
        nI_rS2 = (1 - cumulative_normal(cS2 - S1mu)) / I_area_rS2
        nI_rS2 = Deterministic(
            "nI_rS2",
            math.concatenate(
                ([
                    ((1 - cumulative_normal(c1 - S1mu)) -
                     (1 - cumulative_normal(cS2[:, :, 0].reshape(
                         (nSubj, nCond, 1)) - S1mu))) / I_area_rS2,
                    nI_rS2[:, :, :-1] -
                    (1 - cumulative_normal(cS2[:, :, 1:] - S1mu)) / I_area_rS2,
                    (1 - cumulative_normal(cS2[:, :, nRatings - 2].reshape(
                        (nSubj, nCond, 1)) - S1mu)) / I_area_rS2,
                ]),
                axis=2,
            ),
        )

        # Get nI_rS1 probs
        nI_rS1 = (-cumulative_normal(cS1 - S2mu)) / I_area_rS1
        nI_rS1 = Deterministic(
            "nI_rS1",
            math.concatenate(
                ([
                    cumulative_normal(cS1[:, :, 0].reshape((nSubj, nCond, 1)) -
                                      S2mu) / I_area_rS1,
                    nI_rS1[:, :, :-1] +
                    (cumulative_normal(cS1[:, :, 1:] - S2mu)) / I_area_rS1,
                    (cumulative_normal(c1 - S2mu) -
                     cumulative_normal(cS1[:, :, nRatings - 2].reshape(
                         (nSubj, nCond, 1)) - S2mu)) / I_area_rS1,
                ]),
                axis=2,
            ),
        )

        # Get nC_rS2 probs
        nC_rS2 = (1 - cumulative_normal(cS2 - S2mu)) / C_area_rS2
        nC_rS2 = Deterministic(
            "nC_rS2",
            math.concatenate(
                ([
                    ((1 - cumulative_normal(c1 - S2mu)) -
                     (1 - cumulative_normal(cS2[:, :, 0].reshape(
                         (nSubj, nCond, 1)) - S2mu))) / C_area_rS2,
                    nC_rS2[:, :, :-1] -
                    ((1 - cumulative_normal(cS2[:, :, 1:] - S2mu)) /
                     C_area_rS2),
                    (1 - cumulative_normal(cS2[:, :, nRatings - 2].reshape(
                        (nSubj, nCond, 1)) - S2mu)) / C_area_rS2,
                ]),
                axis=2,
            ),
        )

        # Avoid underflow of probabilities
        nC_rS1 = math.switch(nC_rS1 < Tol, Tol, nC_rS1)
        nI_rS2 = math.switch(nI_rS2 < Tol, Tol, nI_rS2)
        nI_rS1 = math.switch(nI_rS1 < Tol, Tol, nI_rS1)
        nC_rS2 = math.switch(nC_rS2 < Tol, Tol, nC_rS2)

        for c in range(nCond):
            Multinomial(
                f"CR_counts_{c}",
                n=cr[:, c],
                p=nC_rS1[:, c, :],
                observed=counts[:, c, :nRatings],
                shape=(nSubj, nRatings),
            )
            Multinomial(
                f"H_counts_{c}",
                n=hits[:, c],
                p=nC_rS2[:, c, :],
                observed=counts[:, c, nRatings * 3:nRatings * 4],
                shape=(nSubj, nRatings),
            )
            Multinomial(
                f"FA_counts_{c}",
                n=falsealarms[:, c],
                p=nI_rS2[:, c, :],
                observed=counts[:, c, nRatings:nRatings * 2],
                shape=(nSubj, nRatings),
            )
            Multinomial(
                f"M_counts_{c}",
                n=m[:, c],
                p=nI_rS1[:, c, :],
                observed=counts[:, c, nRatings * 2:nRatings * 3],
                shape=(nSubj, nRatings),
            )

        if sample_model is True:

            trace = sample(return_inferencedata=True, **kwargs)

            return model, trace

        else:
            return model
Ejemplo n.º 37
0
def IRNN(n_input,
         n_hidden,
         n_output,
         input_type='real',
         out_every_t=False,
         loss_function='CE'):
    np.random.seed(1234)
    rng = np.random.RandomState(1234)

    x, y = initialize_data_nodes(loss_function, input_type, out_every_t)
    inputs = [x, y]

    h_0 = theano.shared(np.zeros((1, n_hidden), dtype=theano.config.floatX))
    V = initialize_matrix(n_input, n_hidden, 'V', rng)
    W = theano.shared(np.identity(n_hidden, dtype=theano.config.floatX))
    out_mat = initialize_matrix(n_hidden, n_output, 'out_mat', rng)
    hidden_bias = theano.shared(
        np.zeros((n_hidden, ), dtype=theano.config.floatX))
    out_bias = theano.shared(np.zeros((n_output, ),
                                      dtype=theano.config.floatX))

    parameters = [h_0, V, W, out_mat, hidden_bias, out_bias]

    def recurrence(x_t, y_t, h_prev, cost_prev, acc_prev, V, W, hidden_bias,
                   out_mat, out_bias):
        if loss_function == 'CE':
            data_lin_output = V[x_t]
        else:
            data_lin_output = T.dot(x_t, V)

        h_t = T.nnet.relu(
            T.dot(h_prev, W) + data_lin_output +
            hidden_bias.dimshuffle('x', 0))
        if out_every_t:
            lin_output = T.dot(h_t, out_mat) + out_bias.dimshuffle('x', 0)
            cost_t, acc_t = compute_cost_t(lin_output, loss_function, y_t)
        else:
            cost_t = theano.shared(np.float32(0.0))
            acc_t = theano.shared(np.float32(0.0))

        return h_t, cost_t, acc_t

    non_sequences = [V, W, hidden_bias, out_mat, out_bias]

    h_0_batch = T.tile(h_0, [x.shape[1], 1])

    if out_every_t:
        sequences = [x, y]
    else:
        sequences = [
            x,
            T.tile(theano.shared(np.zeros((1, 1), dtype=theano.config.floatX)),
                   [x.shape[0], 1, 1])
        ]

    outputs_info = [
        h_0_batch,
        theano.shared(np.float32(0.0)),
        theano.shared(np.float32(0.0))
    ]

    [hidden_states, cost_steps,
     acc_steps], updates = theano.scan(fn=recurrence,
                                       sequences=sequences,
                                       non_sequences=non_sequences,
                                       outputs_info=outputs_info)

    if not out_every_t:
        lin_output = T.dot(hidden_states[-1, :, :],
                           out_mat) + out_bias.dimshuffle('x', 0)
        costs = compute_cost_t(lin_output, loss_function, y)
    else:
        cost = cost_steps.mean()
        accuracy = acc_steps.mean()
        costs = [cost, accuracy]

    return inputs, parameters, costs
Ejemplo n.º 38
0
def UKRNN(n_input, n_hidden, partition, n_output, input_type='real', out_every_t=False, loss_function='CE'):

    np.random.seed(1234)
    rng = np.random.RandomState(1234)

    # Initialize parameters: theta, V_re, V_im, hidden_bias, U, out_bias, h_0
    V = initialize_matrix(n_input, 2*n_hidden, 'V', rng)
    U = initialize_matrix(2 * n_hidden, n_output, 'U', rng)
    hidden_bias = theano.shared(np.asarray(rng.uniform(low=-0.01,
                                                       high=0.01,
                                                       size=(n_hidden,)),
                                           dtype=theano.config.floatX),
                                name='hidden_bias')
    kron_manifold = UnitaryKron(partition)

    MANIFOLD_NAMES = [manifold.str_id for manifold in kron_manifold._manifolds]
    UK = [theano.shared(value=manifold.rand_np(), name=manifold.str_id) for manifold in kron_manifold._manifolds]
    manifolds = {manifold.str_id: manifold for manifold in kron_manifold._manifolds}


    out_bias = theano.shared(np.zeros((n_output,), dtype=theano.config.floatX), name='out_bias')

    bucket = np.sqrt(3. / 2 / n_hidden)
    h_0 = theano.shared(np.asarray(rng.uniform(low=-bucket,
                                               high=bucket,
                                               size=(1, 2 * n_hidden)),
                                   dtype=theano.config.floatX),
                        name='h_0')

    parameters = [V, U, hidden_bias] + UK + [out_bias, h_0]

    x, y = initialize_data_nodes(loss_function, input_type, out_every_t)


    swap_re_im = np.concatenate((np.arange(n_hidden, 2*n_hidden), np.arange(n_hidden)))

    # define the recurrence used by theano.scan
    def recurrence(x_t, y_t, h_prev, cost_prev, acc_prev, V, hidden_bias, out_bias, U, *UK):
        #unitary_step = unitary_transform(h_prev, n_hidden, unitary_matrix)
        unitary_step = unitary_kron_transform(h_prev, n_hidden, UK)

        hidden_lin_output = unitary_step

        # Compute data linear transform
        if loss_function == 'CE':
            data_lin_output = V[T.cast(x_t, INT_STR)]
        else:
            data_lin_output = T.dot(x_t, V)

        # Total linear output
        lin_output = hidden_lin_output + data_lin_output


        # Apply non-linearity ----------------------------

        # scale RELU nonlinearity
        modulus = T.sqrt(lin_output**2 + lin_output[:, swap_re_im]**2)
        rescale = T.maximum(modulus + T.tile(hidden_bias, [2]).dimshuffle('x', 0), 0.) / (modulus + 1e-5)
        h_t = lin_output * rescale

        if out_every_t:
            lin_output = T.dot(h_t, U) + out_bias.dimshuffle('x', 0)
            cost_t, acc_t = compute_cost_t(lin_output, loss_function, y_t)
        else:
            cost_t = theano.shared(NP_FLOAT(0.0))
            acc_t = theano.shared(NP_FLOAT(0.0))

        return h_t, cost_t, acc_t

    # compute hidden states
    h_0_batch = T.tile(h_0, [x.shape[1], 1])
    non_sequences = [V, hidden_bias, out_bias, U] + UK
    if out_every_t:
        sequences = [x, y]
    else:
        sequences = [x, T.tile(theano.shared(np.zeros((1,1), dtype=theano.config.floatX)), [x.shape[0], 1, 1])]

    outputs_info=[h_0_batch, theano.shared(NP_FLOAT(0.0)), theano.shared(NP_FLOAT(0.0))]

    [hidden_states, cost_steps, acc_steps], updates = theano.scan(fn=recurrence,
                                                                  sequences=sequences,
                                                                  non_sequences=non_sequences,
                                                                  outputs_info=outputs_info)

    if not out_every_t:
        lin_output = T.dot(hidden_states[-1,:,:], U) + out_bias.dimshuffle('x', 0)
        costs = compute_cost_t(lin_output, loss_function, y)
    else:
        cost = cost_steps.mean()
        accuracy = acc_steps.mean()
        costs = [cost, accuracy]

    return [x, y], parameters, costs, manifolds
Ejemplo n.º 39
0
    def __init__(self, layer_input, mask, shape, is_predicting, beam_decoding):
        prefix = "WordDecoderLayer_"
        self.y_emb, self.context, self.init_state, self.xidx, self.state_z = layer_input
        self.x_mask, self.y_mask = mask
        self.dim_y, self.hidden_size, self.ctx_size, self.batch_size, self.updated_batch_size, self.latent_size = shape
        self.is_predicting = is_predicting

        self.W = init_weights((self.dim_y, self.hidden_size),
                              prefix + "W",
                              num_concatenate=2,
                              axis_concatenate=1)
        self.U = init_weights((self.hidden_size, self.hidden_size),
                              prefix + "U",
                              "ortho",
                              num_concatenate=2,
                              axis_concatenate=1)
        self.b = init_bias(self.hidden_size, prefix + "b", num_concatenate=2)

        self.Wx = init_weights((self.dim_y, self.hidden_size), prefix + "Wx")
        self.Wxz = init_weights((self.dim_y, self.latent_size), prefix + "Wxz")
        self.bxz = init_bias(self.latent_size, prefix + "bxz")

        self.Ux = init_weights((self.hidden_size, self.hidden_size),
                               prefix + "Ux", "ortho")
        self.bx = init_bias(self.hidden_size, prefix + "bx")

        self.Wc_att = init_weights((self.ctx_size, self.ctx_size),
                                   prefix + "Wc_att", "ortho")
        self.b_att = init_bias(self.ctx_size, prefix + "b_att")

        self.W_comb_att = init_weights((self.hidden_size, self.ctx_size),
                                       prefix + "W_comb_att")
        self.U_att = init_weights((self.ctx_size, 1), prefix + "U_att")

        self.U_nl = init_weights((self.hidden_size, self.hidden_size),
                                 prefix + "U_nl",
                                 "ortho",
                                 num_concatenate=2,
                                 axis_concatenate=1)
        self.b_nl = init_bias(self.hidden_size,
                              prefix + "b_nl",
                              num_concatenate=2)
        self.Ux_nl = init_weights((self.hidden_size, self.hidden_size),
                                  prefix + "Ux_nl", "ortho")
        self.bx_nl = init_bias(self.hidden_size, prefix + "bx_nl")

        self.Wc = init_weights((self.ctx_size, self.hidden_size),
                               prefix + "Wc",
                               num_concatenate=2,
                               axis_concatenate=1)
        self.Wcx = init_weights((self.ctx_size, self.hidden_size),
                                prefix + "Wcx")

        self.W_hz = init_weights((self.hidden_size, self.hidden_size),
                                 prefix + "W_hz")
        self.W_zz = init_weights((self.latent_size, self.hidden_size),
                                 prefix + "W_zz")

        self.W_hu = init_weights((self.hidden_size, self.latent_size),
                                 prefix + "W_hu")
        self.b_hu = init_bias(self.latent_size, prefix + "b_hu")
        self.W_hsigma = init_weights((self.hidden_size, self.latent_size),
                                     prefix + "W_hsigma")
        self.b_hsigma = init_bias(self.latent_size, prefix + "b_hsigma")

        z_params = [self.W_hu, self.b_hu, self.W_hsigma, self.b_hsigma]
        self.params = [
            self.W, self.U, self.b, self.Wx, self.Ux, self.bx, self.U_nl,
            self.b_nl, self.Ux_nl, self.bx_nl, self.Wc, self.Wcx, self.Wc_att,
            self.b_att, self.W_comb_att, self.U_att, self.W_hz, self.W_zz,
            self.W_hu, self.b_hu, self.W_hsigma, self.b_hsigma, self.Wxz,
            self.bxz
        ]

        if is_predicting:
            if beam_decoding:
                self.x_mask = T.tile(self.x_mask, (1, self.batch_size, 1))
            self.y_mask = T.ones((self.batch_size, 1))

        self.pctx = T.dot(self.context, self.Wc_att) + self.b_att
        self.x = T.dot(self.y_emb, self.W) + self.b
        self.xx = T.dot(self.y_emb, self.Wx) + self.bx
        self.xxz = T.dot(self.y_emb, self.Wxz) + self.bxz

        def _slice(x, n):
            if x.ndim == 3:
                return x[:, :, n * self.hidden_size:(n + 1) * self.hidden_size]
            return x[:, n * self.hidden_size:(n + 1) * self.hidden_size]

        def _get_word_atten(pctx, h1, W_comb_att, U_att, x_mask):
            unreg_att = T.tanh(pctx + T.dot(h1, W_comb_att)) * x_mask
            unreg_att = T.dot(unreg_att, U_att)

            word_atten = T.exp(
                unreg_att - T.max(unreg_att, axis=0, keepdims=True)) * x_mask
            sum_word_atten = T.sum(word_atten, axis=0, keepdims=True)
            word_atten = T.switch(T.eq(word_atten, 0.0), 0.0,
                                  word_atten / sum_word_atten)
            word_atten = T.addbroadcast(word_atten, word_atten.ndim - 1)

            return word_atten

        def _active(x, xx, xxz, y_mask, pre_h, pre_z, pctx, context, x_mask, U,
                    Ux, U_nl, Ux_nl, b_nl, bx_nl, Wc, Wcx, W_comb_att, U_att,
                    W_hz, W_zz, W_hu, b_hu, W_hsigma, b_hsigma, xidx):

            tmp1 = T.nnet.sigmoid(T.dot(pre_h, U) + x)
            r1 = _slice(tmp1, 0)
            u1 = _slice(tmp1, 1)
            h1 = T.tanh(T.dot(pre_h * r1, Ux) + xx)
            h1 = u1 * pre_h + (1.0 - u1) * h1
            h1 = y_mask * h1 + (1.0 - y_mask) * pre_h

            # recurrent-vae encoder
            xh_z = T.nnet.sigmoid(T.dot(pre_z, W_zz) + T.dot(h1, W_hz) + xxz)
            mu = T.dot(xh_z, W_hu) + b_hu
            log_var = T.dot(xh_z, W_hsigma) + b_hsigma
            var = T.exp(log_var)
            sigma = T.sqrt(var)
            eps = 0.0
            if not self.is_predicting:
                eps = floatX(
                    np.random.normal(
                        0, 1, (self.updated_batch_size, self.latent_size)))
                eps = T.reshape(eps, mu.shape)
                eps = T.clip(eps, -5, 5)
            z = mu + sigma * eps

            # len(x) * batch_size * 1
            word_atten = _get_word_atten(pctx, h1, W_comb_att, U_att, x_mask)
            atted_ctx = T.sum(word_atten * context, axis=0)

            tmp2 = T.nnet.sigmoid(
                T.dot(atted_ctx, Wc) + T.dot(h1, U_nl) + b_nl)
            r2 = _slice(tmp2, 0)
            u2 = _slice(tmp2, 1)
            h2 = T.tanh(T.dot(atted_ctx, Wcx) + T.dot(h1 * r2, Ux_nl) + bx_nl)
            h2 = u2 * h1 + (1.0 - u2) * h2
            h2 = y_mask * h2 + (1.0 - y_mask) * h1

            cp_idx = T.argmax(word_atten, axis=0).reshape((self.batch_size, 1))
            cp_idx = xidx[cp_idx[:, 0], T.arange(self.batch_size)]

            return h2, z, atted_ctx, cp_idx, mu, var

        sequences = [self.x, self.xx, self.xxz, self.y_mask]
        non_sequences = [
            self.pctx, self.context, self.x_mask, self.U, self.Ux, self.U_nl,
            self.Ux_nl, self.b_nl, self.bx_nl, self.Wc, self.Wcx,
            self.W_comb_att, self.U_att, self.W_hz, self.W_zz, self.W_hu,
            self.b_hu, self.W_hsigma, self.b_hsigma,
            self.xidx.reshape((self.xidx.shape[0], self.batch_size))
        ]

        if self.is_predicting:
            print "use one-step decoder"
            hs, zs, ac, cp_idx, mu, var = _active(
                *(sequences + [self.init_state, self.state_z] + non_sequences))
        else:
            init_z = T.zeros((self.batch_size, self.latent_size),
                             dtype=theano.config.floatX)
            [hs, zs, ac, cp_idx, mu, var], _ = theano.scan(
                _active,
                sequences=sequences,
                outputs_info=[self.init_state, init_z, None, None, None, None],
                non_sequences=non_sequences,
                allow_gc=False,
                strict=True)

        self.hidden_status = hs
        self.atted_context = ac
        self.word_atten = None
        self.cp_idx = cp_idx
        self.dec_z = zs
        self.dec_mu = mu
        self.dec_var = var
        self.z_params = z_params
Ejemplo n.º 40
0
 def get_output_for(self, input, **kwargs):
     return T.tile(
         input.reshape((input.shape[0], input.shape[1], input.shape[2], 1)),
         (1, 1, 1, self.n))
            th_g_pred_s = th_g_pred_s + th_g_dot * dt
        predict = tt.set_subtensor(predict[counter:counter + 1], th_g_pred_s)

    return predict


# In[ ]:

model_C = pm.Model()
alpha1 = 3.
beta1 = 0.05
alpha2 = 1.0
# define the distribution
with model_C:
    sigma2s = pm.InverseGamma('sigma2s', alpha=alpha1, beta=beta1, shape=1)
    sigma2 = pm.Deterministic('sigma2', tt.tile(sigma2s, th.shape[0]))
    gamma2 = pm.Exponential(name='gamma2', lam=alpha2)
    ln_k_guess = pm.Normal(name='ln_k_guess',
                           mu=0,
                           sigma=tt.sqrt(gamma2),
                           shape=1)
    y_mean = pm.Deterministic('y_mean', Solver(ln_k_guess))
    y = pm.Normal(name='y', mu=y_mean, sigma=tt.sqrt(sigma2), observed=thg)

# In[12]:

with model_C:
    mcmc_res_C = pm.sample(draws=5000, step=pm.NUTS())

#_=pm.plot_posterior(mcmc_res_C, var_names=['ln_k_guess'])
Ejemplo n.º 42
0
 def calc_log_gauss_fun_theano(self, Y, mean, covs):
     n_samples, n_dim = Y.shape
     Yc = Y - T.tile(mean, (Y.shape[0], 1))
     exp_val = -0.5*T.sum(Yc**2/T.tile(covs, (Y.shape[0], 1)),1)
     norm_scal = -0.5*T.log(2*np.pi)*n_dim-0.5*T.sum(T.log(covs))
     return exp_val+norm_scal
Ejemplo n.º 43
0
 def get_other3(x, next_modal):
     fea_other = tensor.tile(x, (maxlen, 1))
     fea_other = x.T
     fea_single = fea_other[:, next_modal]
     return fea_other, fea_single
Ejemplo n.º 44
0
                                          deterministic=False)
output_before_softmax_gen = ll.get_output(disc_layers[-1],
                                          gen_dat,
                                          deterministic=False)

l_lab = output_before_softmax_lab[T.arange(args.batch_size), labels]
l_unl = nn.log_sum_exp(output_before_softmax_unl)
l_gen = nn.log_sum_exp(output_before_softmax_gen)
loss_lab = -T.mean(l_lab) + T.mean(
    T.mean(nn.log_sum_exp(output_before_softmax_lab)))
loss_unl = -0.5 * T.mean(l_unl) + 0.5 * T.mean(
    T.nnet.softplus(l_unl)) + 0.5 * T.mean(T.nnet.softplus(l_gen))

# Gradient for disc

z_delta_disc = T.tile(z_jacobian, (args.batch_size, 1)) * args.z_delta
z_d_disc = T.sum(z_jacobian, axis=1).dimshuffle('x', 0) * args.z_delta

x_disc_jacobian_lab = x_lab.repeat(sample_dim, axis=0)
labels_jacobian = labels.repeat(sample_dim)
gen_dat_del_lab = ll.get_output(gen_layers[-1], {
    gen_img_input: x_disc_jacobian_lab,
    gen_noise_input: z_delta_disc
},
                                deterministic=False)
gen_dat_zero_lab = ll.get_output(gen_layers[-1], {
    gen_img_input: x_disc_jacobian_lab,
    gen_noise_input: T.zeros_like(z_delta_disc)
},
                                 deterministic=False)
disc_dat_delta_lab = ll.get_output(disc_layers[-1],
Ejemplo n.º 45
0
    def cov_gradients(self, verbose=0):
        """
         Create covariance function for the gradients

         Returns:
             theano.tensor.matrix: covariance of the gradients. Shape number of points in dip_pos x number of
             points in dip_pos

         """

        # Euclidean distances
        sed_dips_dips = self.squared_euclidean_distances(
            self.dips_position_tiled, self.dips_position_tiled)

        if 'sed_dips_dips' in self.verbose:
            sed_dips_dips = theano.printing.Print('sed_dips_dips')(
                sed_dips_dips)

        # Cartesian distances between dips positions
        h_u = T.vertical_stack(
            T.tile(
                self.dips_position[:, 0] - self.dips_position[:, 0].reshape(
                    (self.dips_position[:, 0].shape[0], 1)),
                self.n_dimensions),
            T.tile(
                self.dips_position[:, 1] - self.dips_position[:, 1].reshape(
                    (self.dips_position[:, 1].shape[0], 1)),
                self.n_dimensions),
            T.tile(
                self.dips_position[:, 2] - self.dips_position[:, 2].reshape(
                    (self.dips_position[:, 2].shape[0], 1)),
                self.n_dimensions))

        # Transpose
        h_v = h_u.T

        # Perpendicularity matrix. Boolean matrix to separate cross-covariance and
        # every gradient direction covariance (block diagonal)
        perpendicularity_matrix = T.zeros_like(sed_dips_dips)

        # Cross-covariances of x
        perpendicularity_matrix = T.set_subtensor(
            perpendicularity_matrix[0:self.dips_position.shape[0],
                                    0:self.dips_position.shape[0]], 1)

        # Cross-covariances of y
        perpendicularity_matrix = T.set_subtensor(
            perpendicularity_matrix[
                self.dips_position.shape[0]:self.dips_position.shape[0] * 2,
                self.dips_position.shape[0]:self.dips_position.shape[0] * 2],
            1)

        # Cross-covariances of z
        perpendicularity_matrix = T.set_subtensor(
            perpendicularity_matrix[self.dips_position.shape[0] *
                                    2:self.dips_position.shape[0] * 3,
                                    self.dips_position.shape[0] *
                                    2:self.dips_position.shape[0] * 3], 1)

        # Covariance matrix for gradients at every xyz direction and their cross-covariances
        C_G = T.switch(
            T.eq(sed_dips_dips, 0),  # This is the condition
            0,  # If true it is equal to 0. This is how a direction affect another
            (  # else, following Chiles book
                (h_u * h_v / sed_dips_dips**2) *
                (((sed_dips_dips < self.a_T) *  # first derivative
                  (-self.c_o_T *
                   ((-14 / self.a_T**2) + 105 / 4 * sed_dips_dips / self.a_T**3
                    - 35 / 2 * sed_dips_dips**3 / self.a_T**5 +
                    21 / 4 * sed_dips_dips**5 / self.a_T**7))) +
                 (sed_dips_dips < self.a_T) *  # Second derivative
                 self.c_o_T * 7 *
                 (9 * sed_dips_dips**5 - 20 * self.a_T**2 * sed_dips_dips**3 +
                  15 * self.a_T**4 * sed_dips_dips - 4 * self.a_T**5) /
                 (2 * self.a_T**7)) - (
                     perpendicularity_matrix *
                     (sed_dips_dips < self.a_T) *  # first derivative
                     self.c_o_T *
                     ((-14 / self.a_T**2) + 105 / 4 * sed_dips_dips /
                      self.a_T**3 - 35 / 2 * sed_dips_dips**3 / self.a_T**5 +
                      21 / 4 * sed_dips_dips**5 / self.a_T**7))))

        # Setting nugget effect of the gradients
        # TODO: This function can be substitued by simply adding the nugget effect to the diag if I remove the condition
        C_G += T.eye(C_G.shape[0]) * self.nugget_effect_grad_T

        # Add name to the theano node
        C_G.name = 'Covariance Gradient'

        if verbose > 1:
            theano.printing.pydotprint(C_G,
                                       outfile="graphs/" +
                                       sys._getframe().f_code.co_name + ".png",
                                       var_with_name_simple=True)

        if str(sys._getframe().f_code.co_name) in self.verbose:
            C_G = theano.printing.Print('Cov Gradients')(C_G)

        return C_G
Ejemplo n.º 46
0
def build_model(tparams, options):
    '''
    x: traing data
    y: traing label
    x3: neighbor data, datanum * neighbornum * featuredim
    y2: neighbor label
    '''
    trng = RandomStreams(SEED)
    # Used for dropout.
    use_noise = theano.shared(numpy_floatX(0.))
    maskl = tensor.matrix('maskl', dtype=config.floatX)
    y = tensor.vector('y', dtype='int32')
    x = tensor.matrix('x', dtype=config.floatX)
    n_samples = x.shape[0]
    dim_proj = x.shape[1]
    maxlen = options['maxlen']
    x3 = tensor.tensor3('x3', dtype=config.floatX)
    y2 = tensor.matrix('y2', dtype='int32')
    neigh_num = x3.shape[1]
    x_nerghbors = tensor.reshape(x3, [n_samples * neigh_num, dim_proj])
    modal_cost = tensor.vector('modal_cost', dtype=config.floatX)
    max_cost = tensor.scalar('max_cost', dtype=config.floatX)
    h = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
    c = tensor.alloc(numpy_floatX(0.), n_samples, dim_proj)
    h_n = tensor.alloc(numpy_floatX(0.), n_samples * neigh_num, dim_proj)
    c_n = tensor.alloc(numpy_floatX(0.), n_samples * neigh_num, dim_proj)
    cost = 0
    cost1_mean = []
    cost2_mean = []
    cost3_mean = []
    next_mean = []
    mask = tensor.ones_like(
        x[:,
          0], dtype=config.floatX)  # maks whether instance enter the  ith iter
    mask_n = tensor.ones_like(x_nerghbors[:, 0], dtype=config.floatX)
    masks = []
    projs = []
    masks.append(mask)
    next_modal = tensor.zeros_like(x[:, 0], dtype='int32')
    next_modal_n = tensor.zeros_like(x_nerghbors[:, 0], dtype='int32')
    # cost_vector = tensor.alloc(numpy_floatX(0.),n_samples,1)
    cost_vector = tensor.alloc(numpy_floatX(0.), 1, n_samples)
    f_pred_set = []
    f_pred_seq_set = []
    f_pred_seq_prob_set = []
    f_get_fea_set = []
    f_fea_other_set = []

    def get_other3(x, next_modal):
        fea_other = tensor.tile(x, (maxlen, 1))
        fea_other = x.T
        fea_single = fea_other[:, next_modal]
        return fea_other, fea_single

    def get_other(x):
        # change the feature x from dim to the form of maxlen * dim
        fea_other = []
        for i in range(maxlen):
            fea_other.append(x * maskl[i])
        return tensor.stack(fea_other)

    def get_single(x, next_modal):
        # get the current modal' feature
        fea_single = x * maskl[next_modal]
        return fea_single

    def compute_dist(neighbor, pred_neighbor, fea_single, pred, mask, y, y2):
        '''
        minimize same label neighbor's distance, maximize different label neighbor's distance
        neighbor: neighbor's feature
        pred_neighbor: neighbor's netmodal's prediction
        fea_single: current instance's feature
        pred: current instance's prediction
        mask: whether current instance stops
        y: current instance's label
        y2: neighbor instance's label
        '''
        loss = 0
        if mask:
            ifsamelabel = -1
            for i in range(3):
                if y == y2[i]:
                    ifsamelabel = 1
                else:
                    ifsamelabel = -1
                dist = tensor.dot(get_other(neighbor[i]).T,
                                  pred_neighbor[i]) - tensor.dot(
                                      get_other(fea_single).T, pred)
                loss += ifsamelabel * tensor.dot(dist, dist.T)
        return loss / 3

    costs = tensor.tile(modal_cost, (n_samples, 1))
    xs = []
    for i in range(recyl_maxlen):
        # set high cost for modal that has been used to prevent predict same modal
        costs = tensor.set_subtensor(
            costs[tensor.arange(n_samples), next_modal], 1)
        feas, update = theano.scan(
            fn=get_single,
            sequences=[x, next_modal],
        )
        fea_single_n, update_n = theano.scan(
            fn=get_single,
            sequences=[x_nerghbors, next_modal_n],
        )
        fea_single = feas
        max_coefficients_supported = 10000

        xs.append(fea_single)

        [h, c] = get_layer(options['encoder'])[1](tparams,
                                                  fea_single,
                                                  options,
                                                  prefix=options['encoder'],
                                                  mask=mask,
                                                  h_before=h,
                                                  c_before=c)
        [h_n,
         c_n] = get_layer(options['encoder'])[1](tparams,
                                                 fea_single_n,
                                                 options,
                                                 prefix=options['encoder'],
                                                 mask=mask_n,
                                                 h_before=h_n,
                                                 c_before=c_n)
        proj = h
        proj_n = h_n
        projs.append(proj)
        projsmatrix = tensor.stack(projs)
        proj_pred = tensor.stack(projs) * tensor.stack(masks)[:, :, None]
        proj_pred = tensor.transpose(proj_pred, (1, 0, 2))
        proj_pred = tensor.reshape(proj_pred, [
            projsmatrix.shape[1], projsmatrix.shape[0] * projsmatrix.shape[2]
        ])
        # print('h_n.shape', h_n.shape)
        if options['use_dropout']:
            proj_pred = dropout_layer(proj_pred, use_noise, trng)
        pred = tensor.nnet.softmax(
            tensor.dot(proj_pred, tparams['U_' + str(i)]) +
            tparams['b_' + str(i)])

        print('i', i)

        f_pred_prob = theano.function([x, maskl, modal_cost, max_cost],
                                      pred,
                                      name='f_pred_prob',
                                      on_unused_input='ignore',
                                      allow_input_downcast=True)
        f_pred = theano.function([x, maskl, modal_cost, max_cost],
                                 pred.argmax(axis=1),
                                 name='f_pred',
                                 on_unused_input='ignore',
                                 allow_input_downcast=True)
        f_pred_set.append(f_pred)

        off = 1e-8
        if pred.dtype == 'float16':
            off = 1e-6

        pred_seq = tensor.nnet.softmax(
            tensor.dot(proj, tparams['U_seq_' + str(i)]) +
            tparams['b_seq_' + str(i)])
        pred_seq_n = tensor.nnet.softmax(
            tensor.dot(proj_n, tparams['U_seq_' + str(i)]) +
            tparams['b_seq_' + str(i)])

        f_pred_seq = theano.function([x, maskl, modal_cost, max_cost],
                                     pred_seq.argmax(axis=1),
                                     name='f_pred_seq',
                                     on_unused_input='ignore',
                                     allow_input_downcast=True)

        f_pred_seq_set.append(f_pred_seq)

        pred_seq_index = pred_seq.argmax(axis=1)
        next_modal = pred_seq_index
        next_modal_n = pred_seq_n.argmax(axis=1)
        next_mean.append(next_modal)
        cost1_vector = tensor.log(pred[tensor.arange(n_samples), y] + off)
        cost1 = (cost1_vector * mask).sum() / (mask.sum() + 1)

        pred_seq_n3 = tensor.reshape(pred_seq_n,
                                     [n_samples, neigh_num, maxlen])
        result_loss2, update = theano.scan(
            fn=compute_dist,
            sequences=[x3, pred_seq_n3, x, pred_seq, mask, y, y2],
        )
        cost2 = result_loss2.mean()
        cost3 = (costs * pred_seq).mean()
        cost1_mean.append(cost1)
        cost2_mean.append(cost2)
        cost3_mean.append(cost3)
        lamda1 = 0.001
        lamda2 = 0.1
        if i == recyl_maxlen - 1:
            lamda1 = 0.000000001
            lamda2 = 0.000000001
        cost += -cost1 + lamda1 * cost2 + lamda2 * cost3
        # cost += -cost1
        # f_fea_other = theano.function([x, x3, y, maskl, modal_cost, max_cost],[nnext, D,cost1,cost2,cost3,mask.sum(),next_modal, fea_single, fea_other, fea_single3, fea_other3], on_unused_input='ignore')
        # f_fea_other_set.append(f_fea_other)
        result, update = theano.scan(lambda b, a: a[b],
                                     sequences=pred_seq_index,
                                     non_sequences=modal_cost)
        if i == 0:
            cost_vector = result
        else:
            cost_vector += result
        # mask the instance if its cost larger than max_cost
        choice = tensor.nonzero(tensor.gt(-cost_vector, -max_cost))[0]
        mask = tensor.zeros_like(x[:, 0], dtype=config.floatX)
        mask = theano.tensor.set_subtensor(mask[choice], 1.)
        masks.append(mask)
        if i < recyl_maxlen:
            cost -= (2 * (1 - mask) * cost1_vector).sum() / (mask.sum() + 1)
        else:
            cost -= cost1
    f_fea_other = theano.function([x, x3, y2, y, maskl, modal_cost, max_cost],
                                  [
                                      tensor.stack(cost1_mean),
                                      tensor.stack(cost2_mean),
                                      tensor.stack(cost3_mean)
                                  ],
                                  on_unused_input='ignore')

    return use_noise, x, x3, y2, maskl, y, cost, modal_cost, max_cost, f_pred_set, f_pred_seq_set, f_fea_other
Ejemplo n.º 47
0
def complex_RNN(n_input,
                n_hidden,
                n_output,
                input_type='real',
                out_every_t=False,
                loss_function='CE'):

    np.random.seed(1234)
    rng = np.random.RandomState(1234)

    # Initialize parameters: theta, V_re, V_im, hidden_bias, U, out_bias, h_0
    V = initialize_matrix(n_input, 2 * n_hidden, 'V', rng)
    U = initialize_matrix(2 * n_hidden, n_output, 'U', rng)
    hidden_bias = theano.shared(np.asarray(rng.uniform(low=-0.01,
                                                       high=0.01,
                                                       size=(n_hidden, )),
                                           dtype=theano.config.floatX),
                                name='hidden_bias')

    reflection = initialize_matrix(2, 2 * n_hidden, 'reflection', rng)
    out_bias = theano.shared(np.zeros((n_output, ),
                                      dtype=theano.config.floatX),
                             name='out_bias')
    theta = theano.shared(np.asarray(rng.uniform(low=-np.pi,
                                                 high=np.pi,
                                                 size=(3, n_hidden)),
                                     dtype=theano.config.floatX),
                          name='theta')

    bucket = np.sqrt(3. / 2 / n_hidden)
    h_0 = theano.shared(np.asarray(rng.uniform(low=-bucket,
                                               high=bucket,
                                               size=(1, 2 * n_hidden)),
                                   dtype=theano.config.floatX),
                        name='h_0')

    parameters = [V, U, hidden_bias, reflection, out_bias, theta, h_0]

    x, y = initialize_data_nodes(loss_function, input_type, out_every_t)

    index_permute = np.random.permutation(n_hidden)

    index_permute_long = np.concatenate(
        (index_permute, index_permute + n_hidden))
    swap_re_im = np.concatenate((np.arange(n_hidden,
                                           2 * n_hidden), np.arange(n_hidden)))

    # define the recurrence used by theano.scan
    def recurrence(x_t, y_t, h_prev, cost_prev, acc_prev, theta, V,
                   hidden_bias, out_bias, U):

        # Compute hidden linear transform
        step1 = times_diag(h_prev, n_hidden, theta[0, :], swap_re_im)
        step2 = do_fft(step1, n_hidden)
        step3 = times_reflection(step2, n_hidden, reflection[0, :])
        step4 = vec_permutation(step3, index_permute_long)
        step5 = times_diag(step4, n_hidden, theta[1, :], swap_re_im)
        step6 = do_ifft(step5, n_hidden)
        step7 = times_reflection(step6, n_hidden, reflection[1, :])
        step8 = times_diag(step7, n_hidden, theta[2, :], swap_re_im)

        hidden_lin_output = step8

        # Compute data linear transform
        if loss_function == 'CE':
            data_lin_output = V[T.cast(x_t, 'int32')]
        else:
            data_lin_output = T.dot(x_t, V)

        # Total linear output
        lin_output = hidden_lin_output + data_lin_output

        # Apply non-linearity ----------------------------

        # scale RELU nonlinearity
        modulus = T.sqrt(lin_output**2 + lin_output[:, swap_re_im]**2)
        rescale = T.maximum(
            modulus + T.tile(hidden_bias, [2]).dimshuffle('x', 0),
            0.) / (modulus + 1e-5)
        h_t = lin_output * rescale

        if out_every_t:
            lin_output = T.dot(h_t, U) + out_bias.dimshuffle('x', 0)
            cost_t, acc_t = compute_cost_t(lin_output, loss_function, y_t)
        else:
            cost_t = theano.shared(np.float32(0.0))
            acc_t = theano.shared(np.float32(0.0))

        return h_t, cost_t, acc_t

    # compute hidden states
    h_0_batch = T.tile(h_0, [x.shape[1], 1])
    non_sequences = [theta, V, hidden_bias, out_bias, U]
    if out_every_t:
        sequences = [x, y]
    else:
        sequences = [
            x,
            T.tile(theano.shared(np.zeros((1, 1), dtype=theano.config.floatX)),
                   [x.shape[0], 1, 1])
        ]

    outputs_info = [
        h_0_batch,
        theano.shared(np.float32(0.0)),
        theano.shared(np.float32(0.0))
    ]

    [hidden_states, cost_steps,
     acc_steps], updates = theano.scan(fn=recurrence,
                                       sequences=sequences,
                                       non_sequences=non_sequences,
                                       outputs_info=outputs_info)

    if not out_every_t:
        lin_output = T.dot(hidden_states[-1, :, :], U) + out_bias.dimshuffle(
            'x', 0)
        costs = compute_cost_t(lin_output, loss_function, y)
    else:
        cost = cost_steps.mean()
        accuracy = acc_steps.mean()
        costs = [cost, accuracy]

    return [x, y], parameters, costs
Ejemplo n.º 48
0
def f_getid_from_replicated(ids, n_samples):
    return T.repeat(ids * n_samples, n_samples) + T.tile(
        T.arange(n_samples), ids.shape[0])
Ejemplo n.º 49
0
    def compute_log_averaged_ei(self, x, X, randomness, incumbent):

        # We compute the old predictive mean at x

        Kzz = compute_kernel(
            self.lls, self.lsf, self.z,
            self.z) + T.eye(self.z.shape[0]) * self.jitter * T.exp(self.lsf)
        KzzInv = T.nlinalg.MatrixInversePSD()(Kzz)
        LLt = T.dot(self.LParamPost, T.transpose(self.LParamPost))
        covCavityInv = KzzInv + LLt * casting(
            self.n_points - self.set_for_training) / casting(self.n_points)
        covCavity = T.nlinalg.MatrixInversePSD()(covCavityInv)
        meanCavity = T.dot(
            covCavity,
            casting(self.n_points - self.set_for_training) /
            casting(self.n_points) * self.mParamPost)
        KzzInvmeanCavity = T.dot(KzzInv, meanCavity)
        Kxz = compute_kernel(self.lls, self.lsf, x, self.z)
        m_old_x = T.dot(Kxz, KzzInvmeanCavity)

        # We compute the old predictive mean at X

        KXz = compute_kernel(self.lls, self.lsf, X, self.z)
        m_old_X = T.dot(KXz, KzzInvmeanCavity)

        # We compute the required cross covariance matrices

        KXX = compute_kernel(self.lls, self.lsf, X, X) - T.dot(
            T.dot(KXz, KzzInv),
            KXz.T) + T.eye(X.shape[0]) * self.jitter * T.exp(self.lsf)
        KXXInv = T.nlinalg.MatrixInversePSD()(KXX)

        KxX = compute_kernel(self.lls, self.lsf, x, X)
        xX = T.concatenate([x, X], 0)
        KxXz = compute_kernel(self.lls, self.lsf, xX, self.z)
        KxX = KxX - T.dot(T.dot(KxXz[0:x.shape[0], :], KzzInv),
                          KxXz[x.shape[0]:xX.shape[0], :].T)

        # We compute the new posterior mean

        samples_internal = T.dot(MatrixChol()(KXX), randomness)

        new_predictive_mean = T.tile(m_old_x,
                                     [1, randomness.shape[1]]) + T.dot(
                                         KxX, T.dot(KXXInv, samples_internal))

        # We compute the new posterior variance

        z_expanded = T.concatenate([self.z, X], 0)
        Kxz_expanded = compute_kernel(self.lls, self.lsf, x, z_expanded)
        Kzz_expanded = compute_kernel(
            self.lls, self.lsf, z_expanded, z_expanded) + T.eye(
                z_expanded.shape[0]) * self.jitter * T.exp(self.lsf)
        Kzz_expandedInv = T.nlinalg.MatrixInversePSD()(Kzz_expanded)
        v_out = T.exp(self.lsf) - T.dot(
            Kxz_expanded * T.dot(Kxz_expanded, Kzz_expandedInv),
            T.ones_like(z_expanded[:, 0:1]))
        new_predictive_var = T.tile(v_out, [1, randomness.shape[1]])

        s = (incumbent - new_predictive_mean) / T.sqrt(new_predictive_var)

        log_ei = T.log((incumbent - new_predictive_mean) * ratio(s) +
                       T.sqrt(new_predictive_var)) + log_n_pdf(s)

        return T.mean(LogSumExp(log_ei, 1), 1)
Ejemplo n.º 50
0
def tile(x, n):
    return T.tile(x, n)
Ejemplo n.º 51
0
    def get_output_for(self, inputs, **kwargs):
        C              = inputs[0]
        q              = inputs[1]
        input_sentence = inputs[2]
        input_time     = inputs[3]
        
        C = C + self.T_[input_time].dimshuffle(('x',0,1))
        
        C_reshaped = T.reshape(C,(-1,C.shape[1],1,self.hid_state_size))
        tiled_q    = T.tile(T.reshape(
            q,(-1,1,1,self.hid_state_size)),(1,C.shape[1],1,1))
        
        input_sentence_mask = self.sentence_mask_mat[input_sentence-1,:C.shape[1]]

        W_in_stacked  = T.concatenate([self.W_in_to_resetgate, 
                                       self.W_in_to_updategate,
                                       self.W_in_to_hid_update], axis=1)
        W_hid_stacked = T.concatenate([self.W_hid_to_resetgate,
                                       self.W_hid_to_updategate,
                                       self.W_hid_to_hid_update], axis=1)
        b_stacked     = T.concatenate([self.b_resetgate,       
                                       self.b_updategate,       
                                       self.b_hid_update], axis=0)
        
        def Ep_Gate(c, m, q, Wb, W1, W2, b1, b2):
            z = T.concatenate([c,m,q,c*q,c*m,T.abs_(c-q),T.abs_(c-m),c*Wb*q,c*Wb*m], axis=2)
            #g = (T.dot(W2, nonlin.tanh(T.dot(z, W1) + b1)) + b2) <- (big mistake :)
            g = (T.dot(nonlin.tanh(T.dot(W1, z) + b1), W2) + b2)
            return g
    
        def slice_w(x, n):
            return x[:, n*self.hid_state_size:(n+1)*self.hid_state_size]

        def step(hid_previous):
            tiled_hid_prev = T.tile(T.reshape(
                hid_previous,(-1,1,1,self.hid_state_size)),(1,C.shape[1],1,1))
            
            g = Ep_Gate(C_reshaped, tiled_hid_prev, tiled_q,
                        self.Wb, self.W1, self.W2, self.b1, self.b2)
            
            g = T.reshape(g,(-1,C.shape[1]))
            g = T.switch(T.eq(input_sentence_mask, 1), g, np.float32(-np.inf))
            g = nonlin.softmax(g)
            e = T.sum(T.reshape(g,(g.shape[0],g.shape[1],1)) * C, axis=1)

            input_n = e
            
            hid_input = T.dot(hid_previous, W_hid_stacked)
            input_n = T.dot(input_n, W_in_stacked) + b_stacked

            resetgate  = slice_w(hid_input, 0) + slice_w(input_n, 0)
            updategate = slice_w(hid_input, 1) + slice_w(input_n, 1)
            resetgate  = self.nonlinearity_resetgate(resetgate)
            updategate = self.nonlinearity_updategate(updategate)

            hid_update_in  = slice_w(input_n, 2)
            hid_update_hid = slice_w(hid_input, 2)
            hid_update     = hid_update_in + resetgate*hid_update_hid

            hid_update = self.nonlinearity_hid(hid_update)

            hid = (1 - updategate)*hid_previous + updategate+hid_update

            return (hid, g)

        hid = q

         
        G = []
        for i in xrange(self.n_pass):
            hid, g = step(hid)
            G.append(T.reshape(g, (-1,1,C.shape[1])))
        
        return T.reshape(T.concatenate(G, axis=1), (-1,C.shape[1]))
Ejemplo n.º 52
0
 def tile(self, x, n):
     return T.tile(x, n)
Ejemplo n.º 53
0
    def get_output_for(self, inputs, **kwargs):
        # input_sentence: sentence size
        # input_time : sentence position
        C              = inputs[0]
        q              = inputs[1]
        input_sentence = inputs[2]
        input_time     = inputs[3]
        
        # Apply time embedding
        C = C + self.T_[input_time].dimshuffle(('x',0,1))
        
        # Reshape for parallelizing computation of gates
        C_reshaped = T.reshape(C,(-1,C.shape[1],1,self.hid_state_size))
        tiled_q    = T.tile(T.reshape(
            q,(-1,1,1,self.hid_state_size)),(1,C.shape[1],1,1))
        
        input_sentence_mask = self.sentence_mask_mat[input_sentence-1,:C.shape[1]]

        W_in_stacked  = T.concatenate([self.W_in_to_resetgate, 
                                       self.W_in_to_updategate,
                                       self.W_in_to_hid_update], axis=1)
        W_hid_stacked = T.concatenate([self.W_hid_to_resetgate,
                                       self.W_hid_to_updategate,
                                       self.W_hid_to_hid_update], axis=1)
        b_stacked     = T.concatenate([self.b_resetgate,       
                                       self.b_updategate,       
                                       self.b_hid_update], axis=0)
        
        def Ep_Gate(c, m, q, Wb, W1, W2, b1, b2):
            z = T.concatenate([c,m,q,c*q,c*m,T.abs_(c-q),T.abs_(c-m),c*Wb*q,c*Wb*m], axis=2)
            #g = (T.dot(W2, nonlin.tanh(T.dot(z, W1) + b1)) + b2) <- (big mistake :)
            g = (T.dot(nonlin.tanh(T.dot(W1, z) + b1), W2) + b2)
            return g
    
        def slice_w(x, n):
            return x[:, n*self.hid_state_size:(n+1)*self.hid_state_size]

        # Step for computing summarized episodes recurrently
        def step(hid_previous):
            # Computing a summarized episode.
            tiled_hid_prev = T.tile(T.reshape(
                hid_previous,(-1,1,1,self.hid_state_size)),(1,C.shape[1],1,1))

            g = Ep_Gate(C_reshaped, tiled_hid_prev, tiled_q,
                        self.Wb, self.W1, self.W2, self.b1, self.b2)

            g = T.reshape(g,(-1,C.shape[1]))
            g = T.switch(T.eq(input_sentence_mask, 1), g, np.float32(-np.inf))
            g = nonlin.softmax(g)
            e = T.sum(T.reshape(g,(g.shape[0],g.shape[1],1)) * C, axis=1)

            # After computing the episode, now it is typical GRU.
            input_n = e
            
            hid_input = T.dot(hid_previous, W_hid_stacked)
            input_n = T.dot(input_n, W_in_stacked) + b_stacked

            resetgate  = slice_w(hid_input, 0) + slice_w(input_n, 0)
            updategate = slice_w(hid_input, 1) + slice_w(input_n, 1)
            resetgate  = self.nonlinearity_resetgate(resetgate)
            updategate = self.nonlinearity_updategate(updategate)

            hid_update_in  = slice_w(input_n, 2)
            hid_update_hid = slice_w(hid_input, 2)
            hid_update     = hid_update_in + resetgate*hid_update_hid

            hid_update = self.nonlinearity_hid(hid_update)

            hid = (1 - updategate)*hid_previous + updategate+hid_update

            return hid

        hid = q

        # Repeat step process in n_pass times.
        for i in xrange(self.n_pass):
            hid = step(hid)
      
        return hid
def sample_hier_rbf(model_matrix, sample_kwargs=None):

    # load the data
    x_mu_rbf = model_matrix['x_mu_rbf']
    x_sd_rbf = model_matrix['x_sd_rbf']
    x_sc = model_matrix['x_sc']
    subj_idx = model_matrix['subj_idx']
    y = model_matrix['y']
    n_subj = model_matrix['n_subj']

    # fit the first model
    n, d = x_mu_rbf.shape
    if sample_kwargs is None:
        # Here, we use specify NUTS as our sampler (implicitly this is the default)
        # and use variational inference to initialize
        sample_kwargs = dict(draws=2000,
                             njobs=2,
                             tune=2000,
                             init='advi+adapt_diag')

    # to do inference, all we have to do is write down the model in our
    # probabilistic programming language (PYMC3) and the software will
    # do inference over it (we can control how this happens, e.g. with
    # Gibbs sampling, MCMC, Variational Inference, but PYMC3 will default
    # to hamiltonian-MCMC with the No U-turn sampler ("NUTS"))

    with pm.Model() as hier_rbf:
        # here, we write down the model

        # Define hierarchical parameters
        # (normal means and standard deviation for regression weights)
        mu_1 = pm.Normal('mu_beta_rbf_mean', mu=0., sd=100.)
        mu_2 = pm.Normal('mu_beta_rbf_stdv', mu=0., sd=100.)
        mu_3 = pm.Normal('mu_beta_stick', mu=0., sd=100.)

        sigma_1 = pm.HalfCauchy('sigma_rbf_means', beta=100)
        sigma_2 = pm.HalfCauchy('sigma_rbf_stdev', beta=100)
        sigma_3 = pm.HalfCauchy('sigma_stick', beta=100)

        # define subject predictor variables (i.e. regression parameters,
        # 1 per subject per condition with a hierarchical prior)
        b_1 = pm.Normal('beta_rbf_mu', mu=mu_1, sd=sigma_1, shape=n_subj)
        b_2 = pm.Normal('beta_rbf_std', mu=mu_2, sd=sigma_2, shape=n_subj)
        b_3 = pm.Normal('beta_sc', mu=mu_3, sd=sigma_3, shape=n_subj)

        # linearly combine the predictors with the subject-specific coefficients
        # as a scaling factor. In practice, the coefficients have to be broadcast
        # in to an NxD matric via theano for element-wise multiplication
        rho = \
            tt.tile(tt.reshape(b_1[subj_idx], (n, 1)), d) * x_mu_rbf + \
            tt.tile(tt.reshape(b_2[subj_idx], (n, 1)), d) * x_sd_rbf + \
            tt.tile(tt.reshape(b_3[subj_idx], (n, 1)), d) * x_sc

        # pass the resultant vector through a softmax to convert to a probability
        # distribution. Note, we don't need an additional noise parameter as that
        # would be collinear with the coefficients.
        p_hat = softmax(rho)

        # Data likelihood
        yl = pm.Categorical('yl', p=p_hat, observed=y)

        # inference!
        trace_rbf = pm.sample(**sample_kwargs)

    return hier_rbf, trace_rbf
Ejemplo n.º 55
0
def correction_factor(x_diff, y_diff, x_intersection, y_intersection, n_mc = 40000):
    """Function calculates the correction factor for given model"""
    
    # The intersection model
    x_sq = x_intersection ** 2
    x_input = np.concatenate((x_intersection, x_sq), axis = 1)

    #MCMC model - correction factors and shif
    x_shared = theano.shared(x_input)
    gp_mean_coeff = np.array([0, epsilon, c])

    gamma_alpha = 1
    gamma_beta = 10

    inv_gamma_alpha = 1
    inv_gamma_beta = 10

    with pm.Model() as gp_posteriors_model:
        #Priors
        tau_sq = pm.InverseGamma("tau_sq", alpha = inv_gamma_alpha, beta = inv_gamma_beta)
        sigma_sq = pm.InverseGamma("sigma_sq", alpha = 10, beta= 1)
        lamb_sq = pm.Gamma("lamb_sq", alpha = gamma_alpha, beta = gamma_beta, shape = 2)
        theta = pm.Normal("theta", mu= 0, sd = 1)

        #Shared variables for the input
        x_input_theta = tt.concatenate([x_shared, tt.tile(theta, (len(x_input), 1))], axis = 1)

        #GP definition
        #Mean
        mean_gp = pm.gp.mean.Linear(coeffs = gp_mean_coeff, intercept = 0)
        #Covariance
        cov_gp = tau_sq * pm.gp.cov.ExpQuad(x_input.shape[1] + 1, ls = tt.sqrt(lamb_sq) / 4, active_dims = [0,2])
        #GP
        gp_model = pm.gp.Marginal(mean_func=mean_gp, cov_func= cov_gp)

        #Marginal likelihoods
        y_ = gp_model.marginal_likelihood("y_", X = x_input_theta, y = y_intersection, noise = tt.sqrt(sigma_sq))
        trace_priors = pm.sample(n_mc, tune = 10000, chains = 1)       
        
    # The complement model
    x_sq = x_diff ** 2
    x_input = np.concatenate((x_diff, x_sq), axis = 1)

    #MCMC model - correction factors and shif
    x_shared = theano.shared(x_input)
    gp_mean_coeff = np.array([0, epsilon, c])

    gamma_alpha = 1
    gamma_beta = 10

    inv_gamma_alpha = 1
    inv_gamma_beta = 10

    with pm.Model() as pymc3_model:
        #Priors
        tau_sq = pm.InverseGamma("tau_sq", alpha = inv_gamma_alpha, beta = inv_gamma_beta)
        sigma_sq = pm.InverseGamma("sigma_sq", alpha = 10, beta= 1)
        lamb_sq = pm.Gamma("lamb_sq", alpha = gamma_alpha, beta = gamma_beta, shape = 2)
        theta = pm.Normal("theta", mu= 0, sd = 1)

        #Shared variables for the input
        x_input_theta = tt.concatenate([x_shared, tt.tile(theta, (len(x_input), 1))], axis = 1)

        #GP definition
        #Mean
        mean_gp = pm.gp.mean.Linear(coeffs = gp_mean_coeff, intercept = 0)
        #Covariance
        cov_gp = tau_sq * pm.gp.cov.ExpQuad(x_input.shape[1] + 1, ls = tt.sqrt(lamb_sq) / 4, active_dims = [0,2])
        #GP
        gp_model = pm.gp.Marginal(mean_func=mean_gp, cov_func= cov_gp)

        #Marginal likelihoods
        y_gp = gp_model.marginal_likelihood("y_", X = x_input_theta, y = y_diff, noise = tt.sqrt(sigma_sq))    
    
    
    log_likelihood = np.empty(0)
    mc_integral = np.empty(n_mc)    
    logp = y_gp.logp

    for i in tqdm(range(n_mc), desc = "Log likelihood eval"):
        log_likelihood = np.append(log_likelihood, logp(trace_priors[i]))
    
    for i in range(n_mc):
        m = max(log_likelihood[:(i + 1)])
        mc_integral[i] = (np.exp(m) * np.sum(np.exp(log_likelihood[:(i + 1)] - m))) / (i + 1)   
    
    return log_likelihood, mc_integral
def evaluate_lenet5(learning_rate=0.01,
                    n_epochs=100,
                    emb_size=40,
                    batch_size=50,
                    describ_max_len=20,
                    type_size=12,
                    filter_size=[3, 5],
                    maxSentLen=100,
                    hidden_size=[300, 300]):

    model_options = locals().copy()
    print "model options", model_options
    emb_root = '/save/wenpeng/datasets/LORELEI/multi-lingual-emb/'
    seed = 1234
    np.random.seed(seed)
    rng = np.random.RandomState(
        seed)  #random seed, control the model generates the same results
    srng = T.shared_randomstreams.RandomStreams(rng.randint(seed))

    all_sentences, all_masks, all_labels, word2id = load_BBN_multi_labels_dataset(
        maxlen=maxSentLen
    )  #minlen, include one label, at least one word in the sentence
    label_sent, label_mask = load_SF_type_descriptions(word2id, type_size,
                                                       describ_max_len)
    label_sent = np.asarray(label_sent, dtype='int32')
    label_mask = np.asarray(label_mask, dtype=theano.config.floatX)

    train_sents = np.asarray(all_sentences[0], dtype='int32')
    train_masks = np.asarray(all_masks[0], dtype=theano.config.floatX)
    train_labels = np.asarray(all_labels[0], dtype='int32')
    train_size = len(train_labels)

    dev_sents = np.asarray(all_sentences[1], dtype='int32')
    dev_masks = np.asarray(all_masks[1], dtype=theano.config.floatX)
    dev_labels = np.asarray(all_labels[1], dtype='int32')
    dev_size = len(dev_labels)
    '''
    combine train and dev
    '''
    train_sents = np.concatenate([train_sents, dev_sents], axis=0)
    train_masks = np.concatenate([train_masks, dev_masks], axis=0)
    train_labels = np.concatenate([train_labels, dev_labels], axis=0)
    train_size = train_size + dev_size

    test_sents = np.asarray(all_sentences[2], dtype='int32')
    test_masks = np.asarray(all_masks[2], dtype=theano.config.floatX)
    test_labels = np.asarray(all_labels[2], dtype='int32')
    test_size = len(test_labels)

    vocab_size = len(word2id) + 1  # add one zero pad index

    rand_values = rng.normal(
        0.0, 0.01,
        (vocab_size, emb_size))  #generate a matrix by Gaussian distribution
    rand_values[0] = np.array(np.zeros(emb_size), dtype=theano.config.floatX)
    id2word = {y: x for x, y in word2id.iteritems()}
    word2vec = load_fasttext_multiple_word2vec_given_file([
        emb_root + 'IL5-cca-wiki-lorelei-d40.eng.vec',
        emb_root + 'IL5-cca-wiki-lorelei-d40.IL5.vec'
    ], 40)
    rand_values = load_word2vec_to_init(rand_values, id2word, word2vec)
    embeddings = theano.shared(
        value=np.array(rand_values, dtype=theano.config.floatX), borrow=True
    )  #wrap up the python variable "rand_values" into theano variable

    #now, start to build the input form of the model
    sents_id_matrix = T.imatrix('sents_id_matrix')
    sents_mask = T.fmatrix('sents_mask')
    labels = T.imatrix('labels')  #batch*12

    des_id_matrix = T.imatrix()
    des_mask = T.fmatrix()
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    common_input = embeddings[sents_id_matrix.flatten()].reshape(
        (batch_size, maxSentLen, emb_size)).dimshuffle(
            0, 2, 1)  #the input format can be adapted into CNN or GRU or LSTM
    bow_emb = T.sum(common_input * sents_mask.dimshuffle(0, 'x', 1), axis=2)
    repeat_common_input = T.repeat(
        normalize_tensor3_colwise(common_input), type_size,
        axis=0)  #(batch_size*type_size, emb_size, maxsentlen)

    des_input = embeddings[des_id_matrix.flatten()].reshape(
        (type_size, describ_max_len, emb_size)).dimshuffle(0, 2, 1)
    bow_des = T.sum(des_input * des_mask.dimshuffle(0, 'x', 1),
                    axis=2)  #(tyope_size, emb_size)
    repeat_des_input = T.tile(
        normalize_tensor3_colwise(des_input),
        (batch_size, 1, 1))  #(batch_size*type_size, emb_size, maxsentlen)

    conv_W, conv_b = create_conv_para(rng,
                                      filter_shape=(hidden_size[0], 1,
                                                    emb_size, filter_size[0]))
    conv_W2, conv_b2 = create_conv_para(rng,
                                        filter_shape=(hidden_size[0], 1,
                                                      emb_size,
                                                      filter_size[1]))
    multiCNN_para = [conv_W, conv_b, conv_W2, conv_b2]

    conv_att_W, conv_att_b = create_conv_para(rng,
                                              filter_shape=(hidden_size[0], 1,
                                                            emb_size,
                                                            filter_size[0]))
    conv_W_context, conv_b_context = create_conv_para(
        rng, filter_shape=(hidden_size[0], 1, emb_size, 1))
    conv_att_W2, conv_att_b2 = create_conv_para(rng,
                                                filter_shape=(hidden_size[0],
                                                              1, emb_size,
                                                              filter_size[1]))
    conv_W_context2, conv_b_context2 = create_conv_para(
        rng, filter_shape=(hidden_size[0], 1, emb_size, 1))
    ACNN_para = [
        conv_att_W, conv_att_b, conv_W_context, conv_att_W2, conv_att_b2,
        conv_W_context2
    ]

    # NN_para = multiCNN_para+ACNN_para

    conv_model = Conv_with_Mask(
        rng,
        input_tensor3=common_input,
        mask_matrix=sents_mask,
        image_shape=(batch_size, 1, emb_size, maxSentLen),
        filter_shape=(hidden_size[0], 1, emb_size, filter_size[0]),
        W=conv_W,
        b=conv_b
    )  #mutiple mask with the conv_out to set the features by UNK to zero
    sent_embeddings = conv_model.maxpool_vec  #(batch_size, hidden_size) # each sentence then have an embedding of length hidden_size

    conv_model2 = Conv_with_Mask(
        rng,
        input_tensor3=common_input,
        mask_matrix=sents_mask,
        image_shape=(batch_size, 1, emb_size, maxSentLen),
        filter_shape=(hidden_size[0], 1, emb_size, filter_size[1]),
        W=conv_W2,
        b=conv_b2
    )  #mutiple mask with the conv_out to set the features by UNK to zero
    sent_embeddings2 = conv_model2.maxpool_vec  #(batch_size, hidden_size) # each sentence then have an embedding of length hidden_size

    LR_input = T.concatenate([sent_embeddings, sent_embeddings2, bow_emb],
                             axis=1)
    LR_input_size = hidden_size[0] * 2 + emb_size
    #classification layer, it is just mapping from a feature vector of size "hidden_size" to a vector of only two values: positive, negative
    U_a = create_ensemble_para(
        rng, 12, LR_input_size)  # the weight matrix hidden_size*2
    LR_b = theano.shared(value=np.zeros((12, ), dtype=theano.config.floatX),
                         name='LR_b',
                         borrow=True)  #bias for each target class
    LR_para = [U_a, LR_b]
    layer_LR = LogisticRegression(
        rng, input=LR_input, n_in=LR_input_size, n_out=12, W=U_a, b=LR_b
    )  #basically it is a multiplication between weight matrix and input feature vector
    score_matrix = T.nnet.sigmoid(layer_LR.before_softmax)  #batch * 12
    prob_pos = T.where(labels < 1, 1.0 - score_matrix, score_matrix)

    loss = -T.mean(T.log(prob_pos))
    '''
    GRU
    '''
    U1, W1, b1 = create_GRU_para(rng, emb_size, hidden_size[0])
    GRU_NN_para = [
        U1, W1, b1
    ]  #U1 includes 3 matrices, W1 also includes 3 matrices b1 is bias
    # gru_input = common_input.dimshuffle((0,2,1))   #gru requires input (batch_size, emb_size, maxSentLen)
    gru_layer = GRU_Batch_Tensor_Input_with_Mask(common_input, sents_mask,
                                                 hidden_size[0], U1, W1, b1)
    gru_sent_embeddings = gru_layer.output_sent_rep  # (batch_size, hidden_size)

    LR_att_input = T.concatenate([gru_sent_embeddings, bow_emb], axis=1)
    LR_att_input_size = hidden_size[0] + emb_size
    #classification layer, it is just mapping from a feature vector of size "hidden_size" to a vector of only two values: positive, negative
    U_att_a = create_ensemble_para(
        rng, 12, LR_att_input_size)  # the weight matrix hidden_size*2
    LR_att_b = theano.shared(value=np.zeros((12, ),
                                            dtype=theano.config.floatX),
                             name='LR_b',
                             borrow=True)  #bias for each target class
    LR_att_para = [U_att_a, LR_att_b]
    layer_att_LR = LogisticRegression(
        rng,
        input=LR_att_input,
        n_in=LR_att_input_size,
        n_out=12,
        W=U_att_a,
        b=LR_att_b
    )  #basically it is a multiplication between weight matrix and input feature vector
    att_score_matrix = T.nnet.sigmoid(layer_att_LR.before_softmax)  #batch * 12
    att_prob_pos = T.where(labels < 1, 1.0 - att_score_matrix,
                           att_score_matrix)

    att_loss = -T.mean(T.log(att_prob_pos))
    '''
    ACNN
    '''
    attentive_conv_layer = Attentive_Conv_for_Pair(
        rng,
        origin_input_tensor3=common_input,
        origin_input_tensor3_r=common_input,
        input_tensor3=common_input,
        input_tensor3_r=common_input,
        mask_matrix=sents_mask,
        mask_matrix_r=sents_mask,
        image_shape=(batch_size, 1, emb_size, maxSentLen),
        image_shape_r=(batch_size, 1, emb_size, maxSentLen),
        filter_shape=(hidden_size[0], 1, emb_size, filter_size[0]),
        filter_shape_context=(hidden_size[0], 1, emb_size, 1),
        W=conv_att_W,
        b=conv_att_b,
        W_context=conv_W_context,
        b_context=conv_b_context)
    sent_att_embeddings = attentive_conv_layer.attentive_maxpool_vec_l

    attentive_conv_layer2 = Attentive_Conv_for_Pair(
        rng,
        origin_input_tensor3=common_input,
        origin_input_tensor3_r=common_input,
        input_tensor3=common_input,
        input_tensor3_r=common_input,
        mask_matrix=sents_mask,
        mask_matrix_r=sents_mask,
        image_shape=(batch_size, 1, emb_size, maxSentLen),
        image_shape_r=(batch_size, 1, emb_size, maxSentLen),
        filter_shape=(hidden_size[0], 1, emb_size, filter_size[1]),
        filter_shape_context=(hidden_size[0], 1, emb_size, 1),
        W=conv_att_W2,
        b=conv_att_b2,
        W_context=conv_W_context2,
        b_context=conv_b_context2)
    sent_att_embeddings2 = attentive_conv_layer2.attentive_maxpool_vec_l
    acnn_LR_input = T.concatenate(
        [sent_att_embeddings, sent_att_embeddings2, bow_emb], axis=1)
    acnn_LR_input_size = hidden_size[0] * 2 + emb_size
    #classification layer, it is just mapping from a feature vector of size "hidden_size" to a vector of only two values: positive, negative
    acnn_U_a = create_ensemble_para(
        rng, 12, acnn_LR_input_size)  # the weight matrix hidden_size*2
    acnn_LR_b = theano.shared(value=np.zeros((12, ),
                                             dtype=theano.config.floatX),
                              name='LR_b',
                              borrow=True)  #bias for each target class
    acnn_LR_para = [acnn_U_a, acnn_LR_b]
    acnn_layer_LR = LogisticRegression(
        rng,
        input=acnn_LR_input,
        n_in=acnn_LR_input_size,
        n_out=12,
        W=acnn_U_a,
        b=acnn_LR_b
    )  #basically it is a multiplication between weight matrix and input feature vector
    acnn_score_matrix = T.nnet.sigmoid(
        acnn_layer_LR.before_softmax)  #batch * 12
    acnn_prob_pos = T.where(labels < 1, 1.0 - acnn_score_matrix,
                            acnn_score_matrix)

    acnn_loss = -T.mean(T.log(acnn_prob_pos))
    '''
    dataless cosine
    '''
    cosine_scores = normalize_matrix_rowwise(bow_emb).dot(
        normalize_matrix_rowwise(bow_des).T)
    cosine_score_matrix = T.nnet.sigmoid(
        cosine_scores)  #(batch_size, type_size)
    '''
    dataless top-30 fine grained cosine
    '''
    fine_grained_cosine = T.batched_dot(
        repeat_common_input.dimshuffle(0, 2, 1),
        repeat_des_input)  #(batch_size*type_size,maxsentlen,describ_max_len)
    fine_grained_cosine_to_matrix = fine_grained_cosine.reshape(
        (batch_size * type_size, maxSentLen * describ_max_len))
    sort_fine_grained_cosine_to_matrix = T.sort(fine_grained_cosine_to_matrix,
                                                axis=1)
    top_k_simi = sort_fine_grained_cosine_to_matrix[:,
                                                    -30:]  # (batch_size*type_size, 5)
    max_fine_grained_cosine = T.mean(top_k_simi, axis=1)
    top_k_cosine_scores = max_fine_grained_cosine.reshape(
        (batch_size, type_size))
    top_k_score_matrix = T.nnet.sigmoid(top_k_cosine_scores)

    params = multiCNN_para + LR_para + GRU_NN_para + LR_att_para + ACNN_para + acnn_LR_para  # put all model parameters together
    cost = loss + att_loss + acnn_loss + 1e-4 * ((conv_W**2).sum() +
                                                 (conv_W2**2).sum())
    updates = Gradient_Cost_Para(cost, params, learning_rate)
    '''
    testing
    '''

    ensemble_NN_scores = T.max(T.concatenate([
        att_score_matrix.dimshuffle('x', 0, 1),
        score_matrix.dimshuffle('x', 0, 1),
        acnn_score_matrix.dimshuffle('x', 0, 1)
    ],
                                             axis=0),
                               axis=0)
    # '''
    # majority voting, does not work
    # '''
    # binarize_NN = T.where(ensemble_NN_scores > 0.5, 1, 0)
    # binarize_dataless = T.where(cosine_score_matrix > 0.5, 1, 0)
    # binarize_dataless_finegrained = T.where(top_k_score_matrix > 0.5, 1, 0)
    # binarize_conc =  T.concatenate([binarize_NN.dimshuffle('x',0,1), binarize_dataless.dimshuffle('x',0,1),binarize_dataless_finegrained.dimshuffle('x',0,1)],axis=0)
    # sum_binarize_conc = T.sum(binarize_conc,axis=0)
    # binarize_prob = T.where(sum_binarize_conc > 0.0, 1, 0)
    # '''
    # sum up prob, works
    # '''
    # ensemble_scores_1 = 0.6*ensemble_NN_scores+0.4*top_k_score_matrix
    # binarize_prob = T.where(ensemble_scores_1 > 0.3, 1, 0)
    '''
    sum up prob, works
    '''
    ensemble_scores = 0.6 * ensemble_NN_scores + 0.4 * 0.5 * (
        cosine_score_matrix + top_k_score_matrix)
    binarize_prob = T.where(ensemble_scores > 0.3, 1, 0)

    #train_model = theano.function([sents_id_matrix, sents_mask, labels], cost, updates=updates, on_unused_input='ignore')
    train_model = theano.function(
        [sents_id_matrix, sents_mask, labels, des_id_matrix, des_mask],
        cost,
        updates=updates,
        allow_input_downcast=True,
        on_unused_input='ignore')
    # dev_model = theano.function([sents_id_matrix, sents_mask, labels], layer_LR.errors(labels), allow_input_downcast=True, on_unused_input='ignore')
    test_model = theano.function(
        [sents_id_matrix, sents_mask, des_id_matrix, des_mask],
        binarize_prob,
        allow_input_downcast=True,
        on_unused_input='ignore')

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 50000000000  # look as this many examples regardless
    start_time = time.time()
    mid_time = start_time
    past_time = mid_time
    epoch = 0
    done_looping = False

    n_train_batches = train_size / batch_size
    train_batch_start = list(
        np.arange(n_train_batches) * batch_size) + [train_size - batch_size]
    # n_dev_batches=dev_size/batch_size
    # dev_batch_start=list(np.arange(n_dev_batches)*batch_size)+[dev_size-batch_size]
    n_test_batches = test_size / batch_size
    test_batch_start = list(
        np.arange(n_test_batches) * batch_size) + [test_size - batch_size]

    # max_acc_dev=0.0
    max_meanf1_test = 0.0
    max_weightf1_test = 0.0
    train_indices = range(train_size)
    cost_i = 0.0
    while epoch < n_epochs:
        epoch = epoch + 1
        random.Random(100).shuffle(train_indices)
        iter_accu = 0

        for batch_id in train_batch_start:  #for each batch
            # iter means how many batches have been run, taking into loop
            iter = (epoch - 1) * n_train_batches + iter_accu + 1
            iter_accu += 1
            train_id_batch = train_indices[batch_id:batch_id + batch_size]

            cost_i += train_model(train_sents[train_id_batch],
                                  train_masks[train_id_batch],
                                  train_labels[train_id_batch], label_sent,
                                  label_mask)

            #after each 1000 batches, we test the performance of the model on all test data
            if iter % 20 == 0:
                print 'Epoch ', epoch, 'iter ' + str(
                    iter) + ' average cost: ' + str(cost_i / iter), 'uses ', (
                        time.time() - past_time) / 60.0, 'min'
                past_time = time.time()

                error_sum = 0.0
                all_pred_labels = []
                all_gold_labels = []
                for test_batch_id in test_batch_start:  # for each test batch
                    pred_labels = test_model(
                        test_sents[test_batch_id:test_batch_id + batch_size],
                        test_masks[test_batch_id:test_batch_id + batch_size],
                        label_sent, label_mask)
                    gold_labels = test_labels[test_batch_id:test_batch_id +
                                              batch_size]
                    # print 'pred_labels:', pred_labels
                    # print 'gold_labels;', gold_labels
                    all_pred_labels.append(pred_labels)
                    all_gold_labels.append(gold_labels)
                all_pred_labels = np.concatenate(all_pred_labels)
                all_gold_labels = np.concatenate(all_gold_labels)

                test_mean_f1, test_weight_f1 = average_f1_two_array_by_col(
                    all_pred_labels, all_gold_labels)
                if test_weight_f1 > max_weightf1_test:
                    max_weightf1_test = test_weight_f1
                if test_mean_f1 > max_meanf1_test:
                    max_meanf1_test = test_mean_f1
                print '\t\t\t\t\t\t\t\tcurrent f1s:', test_mean_f1, test_weight_f1, '\t\tmax_f1:', max_meanf1_test, max_weightf1_test

        print 'Epoch ', epoch, 'uses ', (time.time() - mid_time) / 60.0, 'min'
        mid_time = time.time()

        #print 'Batch_size: ', update_freq
    end_time = time.time()

    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
Ejemplo n.º 57
0
# Model prior definitions
gamma_alpha = 1
gamma_beta = 10

inv_gamma_alpha = 1
inv_gamma_beta = 10

with pm.Model() as gp_toy_model:
    #Priors
    tau_sq = pm.InverseGamma("tau_sq", alpha = inv_gamma_alpha, beta = inv_gamma_beta)
    sigma_sq = pm.InverseGamma("sigma_sq", alpha = 10, beta= 1)
    lamb_sq = pm.Gamma("lamb_sq", alpha = gamma_alpha, beta = gamma_beta, shape = 2)
    theta = pm.Normal("theta", mu= 0, sd = 1)
    
    #Shared variables for the input
    x_input_theta = tt.concatenate([x_shared, tt.tile(theta, (len(x_input), 1))], axis = 1)
    
    #GP definition
    #Mean
    mean_gp = pm.gp.mean.Linear(coeffs = gp_mean_coeff, intercept = 0)
    #Covariance
    cov_gp = tau_sq * pm.gp.cov.ExpQuad(x_input.shape[1] + 1, ls = tt.sqrt(lamb_sq) / 4, active_dims = [0,2])
    #GP
    gp_model = pm.gp.Marginal(mean_func=mean_gp, cov_func= cov_gp)
    
    #Marginal likelihoods
    y_gp = gp_model.marginal_likelihood("y_", X = x_input_theta, y = y_train_a, noise = tt.sqrt(sigma_sq))


# In[28]:
Ejemplo n.º 58
0
    def get_output(self, train=False):
        X = self.get_input(train)
        print 'ball model X:', X, X.ndim
        self.middle = X
        initial = [[27, 0, 15], [9, 0, 10], [-9, 0, 10], [-27, 0, 10],
                   [27, 0, 30], [9, 0, 30], [-9, 0, 30], [-27, 0, 30],
                   [27, 0, 50], [9, 0, 50], [-9, 0, 50], [-27, 0, 50],
                   [28, 0, 70], [8, 0, 70], [-11, 0, 70], [-28, 0, 70],
                   [41, 0, 15], [45, 0, 15], [49, 0, 15], [53, 0, 15],
                   [67, 0, 15], [81, 0, 15], [94, 0, 15], [105, 0, 15],
                   [28, 0, 88], [28, 0, 104], [28, 0, 119], [28, 0, 133],
                   [28, 0, 145], [28, 0, 156], [8, 0, 90], [8, 0, 110],
                   [8, 0, 127], [8, 0, 141], [8, 0, 155], [8, 0, 169],
                   [-11, 0, 88], [-11, 0, 104], [-11, 0, 119], [-11, 0, 133],
                   [-11, 0, 145], [-11, 0, 156], [-28, 0, 85], [-28, 0, 95],
                   [-28, 0, 105], [-28, 0, 116], [-28, 0, 127], [-28, 0, 135]]
        outputans = []
        for batch in range(0, self.batchsize):
            d = self.middle[batch].reshape((26, ))  ##ndim=0

            ro = T.dot(T.dot(self._getrx(d[3]), self._getry(d[4])),
                       self._getrz(d[5]))
            to1to16 = theano.shared(np.zeros((3, 1), dtype='float32'))
            ###o'=(d1,d2,d3)
            to1to16 = T.set_subtensor(to1to16[0], d[0])
            to1to16 = T.set_subtensor(to1to16[1], d[1])
            to1ti16 = T.set_subtensor(to1to16[2], d[2])
            o1to16 = T.tile(to1ti16, (1, 16))
            #
            xi1to16 = theano.shared(np.zeros((16, 3), dtype='float32'))
            xi1to16 = T.dot(ro, np.array(initial[0:16]).T) + o1to16

            ####################################Thumb First
            ###17-20
            r16 = T.dot(self._getry(d[6]), self._getrz(d[7]))
            ###x16origin rotation joint is sphere 1
            txori16 = theano.shared(np.zeros((3, 1), dtype='float32'))
            txori16 = T.set_subtensor(txori16[0], initial[0][0])
            txori16 = T.set_subtensor(txori16[1], initial[0][1])
            txori16 = T.set_subtensor(txori16[2], initial[0][2])
            xori16 = T.tile(txori16, (1, 4))

            to17to20 = theano.shared(np.zeros((3, 1), dtype='float32'))
            to17to20 = T.set_subtensor(to17to20[0], xi1to16[0][0])
            to17to20 = T.set_subtensor(to17to20[1], xi1to16[1][0])
            to17to20 = T.set_subtensor(to17to20[2], xi1to16[2][0])
            o17to20 = T.tile(to17to20, (1, 4))
            xi17to20 = T.dot(T.dot(ro, r16),
                             np.array(initial[16:20]).T - xori16) + o17to20

            ######################################Thumb Second
            ###21-22
            r20 = self._getrz(d[8])
            txori20 = theano.shared(np.zeros((3, 1), dtype='float32'))
            txori20 = T.set_subtensor(txori20[0], initial[19][0])
            txori20 = T.set_subtensor(txori20[1], initial[19][1])
            txori20 = T.set_subtensor(txori20[2], initial[19][2])
            xori20 = T.tile(txori20, (1, 2))

            to21to22 = theano.shared(np.zeros((3, 1), dtype='float32'))
            to21to22 = T.set_subtensor(to21to22[0], xi17to20[0][3])
            to21to22 = T.set_subtensor(to21to22[1], xi17to20[1][3])
            to21to22 = T.set_subtensor(to21to22[2], xi17to20[2][3])
            o21to22 = T.tile(to21to22, (1, 2))
            xi21to22 = T.dot(T.dot(T.dot(ro, r16), r20),
                             np.array(initial[20:22]).T - xori20) + o21to22

            ######################################Thumb Third
            ###23-24
            r22 = self._getrz(d[9])
            txori22 = theano.shared(np.zeros((3, 1), dtype='float32'))
            txori22 = T.set_subtensor(txori22[0], initial[21][0])
            txori22 = T.set_subtensor(txori22[1], initial[21][1])
            txori22 = T.set_subtensor(txori22[2], initial[21][2])
            xori22 = T.tile(txori22, (1, 2))

            to23to24 = theano.shared(np.zeros((3, 1), dtype='float32'))
            to23to24 = T.set_subtensor(to23to24[0], xi21to22[0][1])
            to23to24 = T.set_subtensor(to23to24[1], xi21to22[1][1])
            to23to24 = T.set_subtensor(to23to24[2], xi21to22[2][1])
            o23to24 = T.tile(to23to24, (1, 2))
            xi23to24 = T.dot(T.dot(T.dot(T.dot(ro, r16), r20), r22),
                             np.array(initial[22:24]).T - xori22) + o23to24

            ######################################First Joint Index
            ###25-26
            r13 = T.dot(self._getrx(d[10]), self._getry(d[11]))
            txori13 = theano.shared(np.zeros((3, 1), dtype='float32'))
            txori13 = T.set_subtensor(txori13[0], initial[12][0])
            txori13 = T.set_subtensor(txori13[1], initial[12][1])
            txori13 = T.set_subtensor(txori13[2], initial[12][2])
            xori13 = T.tile(txori13, (1, 2))

            to25to26 = theano.shared(np.zeros((3, 1), dtype='float32'))
            to25to26 = T.set_subtensor(to25to26[0], xi1to16[0][12])
            to25to26 = T.set_subtensor(to25to26[1], xi1to16[1][12])
            to25to26 = T.set_subtensor(to25to26[2], xi1to16[2][12])
            o25to26 = T.tile(to25to26, (1, 2))
            xi25to26 = T.dot(T.dot(ro, r13),
                             np.array(initial[24:26]).T - xori13) + o25to26

            ######################################First Joint Middle
            ###31-32
            r14 = T.dot(self._getrx(d[14]), self._getry(d[15]))
            txori14 = theano.shared(np.zeros((3, 1), dtype='float32'))
            txori14 = T.set_subtensor(txori14[0], initial[13][0])
            txori14 = T.set_subtensor(txori14[1], initial[13][1])
            txori14 = T.set_subtensor(txori14[2], initial[13][2])
            xori14 = T.tile(txori14, (1, 2))

            to31to32 = theano.shared(np.zeros((3, 1), dtype='float32'))
            to31to32 = T.set_subtensor(to31to32[0], xi1to16[0][13])
            to31to32 = T.set_subtensor(to31to32[1], xi1to16[1][13])
            to31to32 = T.set_subtensor(to31to32[2], xi1to16[2][13])
            o31to32 = T.tile(to31to32, (1, 2))
            xi31to32 = T.dot(T.dot(ro, r14),
                             np.array(initial[30:32]).T - xori14) + o31to32

            ######################################First Joint Ring
            ###37-38
            r15 = T.dot(self._getrx(d[18]), self._getry(d[19]))
            txori15 = theano.shared(np.zeros((3, 1), dtype='float32'))
            txori15 = T.set_subtensor(txori15[0], initial[14][0])
            txori15 = T.set_subtensor(txori15[1], initial[14][1])
            txori15 = T.set_subtensor(txori15[2], initial[14][2])
            xori15 = T.tile(txori15, (1, 2))

            to37to38 = theano.shared(np.zeros((3, 1), dtype='float32'))
            to37to38 = T.set_subtensor(to37to38[0], xi1to16[0][14])
            to37to38 = T.set_subtensor(to37to38[1], xi1to16[1][14])
            to37to38 = T.set_subtensor(to37to38[2], xi1to16[2][14])
            o37to38 = T.tile(to37to38, (1, 2))
            xi37to38 = T.dot(T.dot(ro, r15),
                             np.array(initial[36:38]).T - xori15) + o37to38

            ######################################First Joint Little
            ###43-44
            r16 = T.dot(self._getrx(d[22]), self._getry(d[23]))
            txori16 = theano.shared(np.zeros((3, 1), dtype='float32'))
            txori16 = T.set_subtensor(txori16[0], initial[15][0])
            txori16 = T.set_subtensor(txori16[1], initial[15][1])
            txori16 = T.set_subtensor(txori16[2], initial[15][2])
            xori16 = T.tile(txori16, (1, 2))

            to43to44 = theano.shared(np.zeros((3, 1), dtype='float32'))
            to43to44 = T.set_subtensor(to43to44[0], xi1to16[0][15])
            to43to44 = T.set_subtensor(to43to44[1], xi1to16[1][15])
            to43to44 = T.set_subtensor(to43to44[2], xi1to16[2][15])
            o43to44 = T.tile(to43to44, (1, 2))
            xi43to44 = T.dot(T.dot(ro, r16),
                             np.array(initial[42:44]).T - xori16) + o43to44

            ######################################Second Joint Index
            ###27-28
            r26 = self._getrx(d[12])
            txori26 = theano.shared(np.zeros((3, 1), dtype='float32'))
            txori26 = T.set_subtensor(txori26[0], initial[25][0])
            txori26 = T.set_subtensor(txori26[1], initial[25][1])
            txori26 = T.set_subtensor(txori26[2], initial[25][2])
            xori26 = T.tile(txori26, (1, 2))

            to27to28 = theano.shared(np.zeros((3, 1), dtype='float32'))
            to27to28 = T.set_subtensor(to27to28[0], xi25to26[0][1])
            to27to28 = T.set_subtensor(to27to28[1], xi25to26[1][1])
            to27to28 = T.set_subtensor(to27to28[2], xi25to26[2][1])
            o27to28 = T.tile(to27to28, (1, 2))
            xi27to28 = T.dot(T.dot(T.dot(ro, r13), r26),
                             np.array(initial[26:28]).T - xori26) + o27to28

            ######################################Second Joint Middle
            ###33-34
            r32 = self._getrx(d[16])
            txori32 = theano.shared(np.zeros((3, 1), dtype='float32'))
            txori32 = T.set_subtensor(txori32[0], initial[31][0])
            txori32 = T.set_subtensor(txori32[1], initial[31][1])
            txori32 = T.set_subtensor(txori32[2], initial[31][2])
            xori32 = T.tile(txori32, (1, 2))

            to33to34 = theano.shared(np.zeros((3, 1), dtype='float32'))
            to33to34 = T.set_subtensor(to33to34[0], xi31to32[0][1])
            to33to34 = T.set_subtensor(to33to34[1], xi31to32[1][1])
            to33to34 = T.set_subtensor(to33to34[2], xi31to32[2][1])
            o33to34 = T.tile(to33to34, (1, 2))
            xi33to34 = T.dot(T.dot(T.dot(ro, r14), r32),
                             np.array(initial[32:34]).T - xori32) + o33to34

            ######################################Second Joint Ring
            ###39-40
            r38 = self._getrx(d[20])
            txori38 = theano.shared(np.zeros((3, 1), dtype='float32'))
            txori38 = T.set_subtensor(txori38[0], initial[37][0])
            txori38 = T.set_subtensor(txori38[1], initial[37][1])
            txori38 = T.set_subtensor(txori38[2], initial[37][2])
            xori38 = T.tile(txori38, (1, 2))

            to39to40 = theano.shared(np.zeros((3, 1), dtype='float32'))
            to39to40 = T.set_subtensor(to39to40[0], xi37to38[0][1])
            to39to40 = T.set_subtensor(to39to40[1], xi37to38[1][1])
            to39to40 = T.set_subtensor(to39to40[2], xi37to38[2][1])
            o39to40 = T.tile(to39to40, (1, 2))
            xi39to40 = T.dot(T.dot(T.dot(ro, r15), r38),
                             np.array(initial[38:40]).T - xori38) + o39to40

            ######################################Second Joint Little
            ###45-46
            r44 = self._getrx(d[24])
            txori44 = theano.shared(np.zeros((3, 1), dtype='float32'))
            txori44 = T.set_subtensor(txori44[0], initial[43][0])
            txori44 = T.set_subtensor(txori44[1], initial[43][1])
            txori44 = T.set_subtensor(txori44[2], initial[43][2])
            xori44 = T.tile(txori44, (1, 2))

            to45to46 = theano.shared(np.zeros((3, 1), dtype='float32'))
            to45to46 = T.set_subtensor(to45to46[0], xi43to44[0][1])
            to45to46 = T.set_subtensor(to45to46[1], xi43to44[1][1])
            to45to46 = T.set_subtensor(to45to46[2], xi43to44[2][1])
            o45to46 = T.tile(to45to46, (1, 2))
            xi45to46 = T.dot(T.dot(T.dot(ro, r16), r44),
                             np.array(initial[44:46]).T - xori44) + o45to46

            #####################################Third Joint Index
            ###29-30
            r28 = self._getrx(d[13])
            txori28 = theano.shared(np.zeros((3, 1), dtype='float32'))
            txori28 = T.set_subtensor(txori28[0], initial[27][0])
            txori28 = T.set_subtensor(txori28[1], initial[27][1])
            txori28 = T.set_subtensor(txori28[2], initial[27][2])
            xori28 = T.tile(txori28, (1, 2))

            to29to30 = theano.shared(np.zeros((3, 1), dtype='float32'))
            to29to30 = T.set_subtensor(to29to30[0], xi27to28[0][1])
            to29to30 = T.set_subtensor(to29to30[1], xi27to28[1][1])
            to29to30 = T.set_subtensor(to29to30[2], xi27to28[2][1])
            o29to30 = T.tile(to29to30, (1, 2))
            xi29to30 = T.dot(T.dot(T.dot(T.dot(ro, r13), r26), r28),
                             np.array(initial[28:30]).T - xori28) + o29to30

            #####################################Third Joint Middle
            ###35-36
            r34 = self._getrx(d[17])
            txori34 = theano.shared(np.zeros((3, 1), dtype='float32'))
            txori34 = T.set_subtensor(txori34[0], initial[33][0])
            txori34 = T.set_subtensor(txori34[1], initial[33][1])
            txori34 = T.set_subtensor(txori34[2], initial[33][2])
            xori34 = T.tile(txori34, (1, 2))

            to35to36 = theano.shared(np.zeros((3, 1), dtype='float32'))
            to35to36 = T.set_subtensor(to35to36[0], xi33to34[0][1])
            to35to36 = T.set_subtensor(to35to36[1], xi33to34[1][1])
            to35to36 = T.set_subtensor(to35to36[2], xi33to34[2][1])
            o35to36 = T.tile(to35to36, (1, 2))
            xi35to36 = T.dot(T.dot(T.dot(T.dot(ro, r14), r32), r34),
                             np.array(initial[34:36]).T - xori34) + o35to36

            #####################################Third Joint Ring
            ###41-42
            r40 = self._getrx(d[21])
            txori40 = theano.shared(np.zeros((3, 1), dtype='float32'))
            txori40 = T.set_subtensor(txori40[0], initial[39][0])
            txori40 = T.set_subtensor(txori40[1], initial[39][1])
            txori40 = T.set_subtensor(txori40[2], initial[39][2])
            xori40 = T.tile(txori40, (1, 2))

            to41to42 = theano.shared(np.zeros((3, 1), dtype='float32'))
            to41to42 = T.set_subtensor(to41to42[0], xi39to40[0][1])
            to41to42 = T.set_subtensor(to41to42[1], xi39to40[1][1])
            to41to42 = T.set_subtensor(to41to42[2], xi39to40[2][1])
            o41to42 = T.tile(to41to42, (1, 2))
            xi41to42 = T.dot(T.dot(T.dot(T.dot(ro, r15), r38), r40),
                             np.array(initial[40:42]).T - xori40) + o41to42

            #####################################Third Joint Little
            ###47-48
            r46 = self._getrx(d[25])
            txori46 = theano.shared(np.zeros((3, 1), dtype='float32'))
            txori46 = T.set_subtensor(txori46[0], initial[45][0])
            txori46 = T.set_subtensor(txori46[1], initial[45][1])
            txori46 = T.set_subtensor(txori46[2], initial[45][2])
            xori46 = T.tile(txori46, (1, 2))

            to47to48 = theano.shared(np.zeros((3, 1), dtype='float32'))
            to47to48 = T.set_subtensor(to47to48[0], xi45to46[0][1])
            to47to48 = T.set_subtensor(to47to48[1], xi45to46[1][1])
            to47to48 = T.set_subtensor(to47to48[2], xi45to46[2][1])
            o47to48 = T.tile(to47to48, (1, 2))
            xi47to48 = T.dot(T.dot(T.dot(T.dot(ro, r16), r44), r46),
                             np.array(initial[46:48]).T - xori46) + o47to48

            ret = T.concatenate([
                xi1to16, xi17to20, xi21to22, xi23to24, xi25to26, xi27to28,
                xi29to30, xi31to32, xi33to34, xi35to36, xi37to38, xi39to40,
                xi41to42, xi43to44, xi45to46, xi47to48
            ],
                                axis=1)
            ret = ret.reshape((144, ))
            outputans.append(ret)
        outputans = T.reshape(outputans, (self.batchsize, 144))
        return outputans
Ejemplo n.º 59
0
 def build_gauss_model_theano(self, X):
     mean = T.mean(X, 0)
     Xc = X - T.tile(mean, (X.shape[0], 1))
     covs = T.sum(Xc ** 2, 0) / Xc.shape[0] + self.min_cov
     return mean, covs
Ejemplo n.º 60
0
def evaluate_lenet5(learning_rate=0.01,
                    n_epochs=4,
                    emb_size=300,
                    batch_size=50,
                    describ_max_len=20,
                    type_size=12,
                    filter_size=[3, 5],
                    maxSentLen=100,
                    hidden_size=[300, 300]):

    model_options = locals().copy()
    print "model options", model_options
    emb_root = '/save/wenpeng/datasets/LORELEI/multi-lingual-emb/2018-il9-il10/multi-emb/'
    test_file_path = '/save/wenpeng/datasets/LORELEI/il9/il9-setE-as-test-input_ner_filtered_w2.txt'
    output_file_path = '/save/wenpeng/datasets/LORELEI/il9/il9_system_output_onlyMT_BBN_epoch4.json'
    seed = 1234
    np.random.seed(seed)
    rng = np.random.RandomState(
        seed)  #random seed, control the model generates the same results
    srng = T.shared_randomstreams.RandomStreams(rng.randint(seed))
    word2id = {}
    # all_sentences, all_masks, all_labels, all_other_labels, word2id=load_BBN_il5Trans_il5_dataset(maxlen=maxSentLen)  #minlen, include one label, at least one word in the sentence
    train_p1_sents, train_p1_masks, train_p1_labels, word2id = load_trainingData_types(
        word2id, maxSentLen)
    train_p2_sents, train_p2_masks, train_p2_labels, train_p2_other_labels, word2id = load_trainingData_types_plus_others(
        word2id, maxSentLen)
    test_sents, test_masks, test_lines, word2id = load_official_testData_only_MT(
        word2id, maxSentLen, test_file_path)

    label_sent, label_mask = load_SF_type_descriptions(word2id, type_size,
                                                       describ_max_len)
    label_sent = np.asarray(label_sent, dtype='int32')
    label_mask = np.asarray(label_mask, dtype=theano.config.floatX)

    train_p1_sents = np.asarray(train_p1_sents, dtype='int32')
    train_p1_masks = np.asarray(train_p1_masks, dtype=theano.config.floatX)
    train_p1_labels = np.asarray(train_p1_labels, dtype='int32')
    train_p1_size = len(train_p1_labels)

    train_p2_sents = np.asarray(train_p2_sents, dtype='int32')
    train_p2_masks = np.asarray(train_p2_masks, dtype=theano.config.floatX)
    train_p2_labels = np.asarray(train_p2_labels, dtype='int32')
    train_p2_other_labels = np.asarray(train_p2_other_labels, dtype='int32')
    train_p2_size = len(train_p2_labels)
    '''
    combine train_p1 and train_p2
    '''
    train_sents = np.concatenate([train_p1_sents, train_p2_sents], axis=0)
    train_masks = np.concatenate([train_p1_masks, train_p2_masks], axis=0)
    train_labels = np.concatenate([train_p1_labels, train_p2_labels], axis=0)
    train_size = train_p1_size + train_p2_size

    test_sents = np.asarray(test_sents, dtype='int32')
    test_masks = np.asarray(test_masks, dtype=theano.config.floatX)
    # test_labels=np.asarray(all_labels[2], dtype='int32')
    test_size = len(test_sents)

    vocab_size = len(word2id) + 1  # add one zero pad index

    rand_values = rng.normal(
        0.0, 0.01,
        (vocab_size, emb_size))  #generate a matrix by Gaussian distribution
    rand_values[0] = np.array(np.zeros(emb_size), dtype=theano.config.floatX)
    id2word = {y: x for x, y in word2id.iteritems()}
    word2vec = load_fasttext_multiple_word2vec_given_file([
        emb_root + '100k-ENG-multicca.300.ENG.txt',
        emb_root + '100k-IL9-multicca.d300.IL9.txt'
    ], 300)
    rand_values = load_word2vec_to_init(rand_values, id2word, word2vec)
    embeddings = theano.shared(
        value=np.array(rand_values, dtype=theano.config.floatX), borrow=True
    )  #wrap up the python variable "rand_values" into theano variable

    #now, start to build the input form of the model
    sents_id_matrix = T.imatrix('sents_id_matrix')
    sents_mask = T.fmatrix('sents_mask')
    labels = T.imatrix('labels')  #batch*12
    other_labels = T.imatrix()  #batch*4

    des_id_matrix = T.imatrix()
    des_mask = T.fmatrix()
    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    common_input = embeddings[sents_id_matrix.flatten()].reshape(
        (batch_size, maxSentLen, emb_size)).dimshuffle(
            0, 2, 1)  #the input format can be adapted into CNN or GRU or LSTM
    bow_emb = T.sum(common_input * sents_mask.dimshuffle(0, 'x', 1), axis=2)
    repeat_common_input = T.repeat(
        normalize_tensor3_colwise(common_input), type_size,
        axis=0)  #(batch_size*type_size, emb_size, maxsentlen)

    des_input = embeddings[des_id_matrix.flatten()].reshape(
        (type_size, describ_max_len, emb_size)).dimshuffle(0, 2, 1)
    bow_des = T.sum(des_input * des_mask.dimshuffle(0, 'x', 1),
                    axis=2)  #(tyope_size, emb_size)
    repeat_des_input = T.tile(
        normalize_tensor3_colwise(des_input),
        (batch_size, 1, 1))  #(batch_size*type_size, emb_size, maxsentlen)

    conv_W, conv_b = create_conv_para(rng,
                                      filter_shape=(hidden_size[0], 1,
                                                    emb_size, filter_size[0]))
    conv_W2, conv_b2 = create_conv_para(rng,
                                        filter_shape=(hidden_size[0], 1,
                                                      emb_size,
                                                      filter_size[1]))
    multiCNN_para = [conv_W, conv_b, conv_W2, conv_b2]

    conv_att_W, conv_att_b = create_conv_para(rng,
                                              filter_shape=(hidden_size[0], 1,
                                                            emb_size,
                                                            filter_size[0]))
    conv_W_context, conv_b_context = create_conv_para(
        rng, filter_shape=(hidden_size[0], 1, emb_size, 1))
    conv_att_W2, conv_att_b2 = create_conv_para(rng,
                                                filter_shape=(hidden_size[0],
                                                              1, emb_size,
                                                              filter_size[1]))
    conv_W_context2, conv_b_context2 = create_conv_para(
        rng, filter_shape=(hidden_size[0], 1, emb_size, 1))
    ACNN_para = [
        conv_att_W, conv_att_b, conv_W_context, conv_att_W2, conv_att_b2,
        conv_W_context2
    ]
    '''
    multi-CNN
    '''
    conv_model = Conv_with_Mask(
        rng,
        input_tensor3=common_input,
        mask_matrix=sents_mask,
        image_shape=(batch_size, 1, emb_size, maxSentLen),
        filter_shape=(hidden_size[0], 1, emb_size, filter_size[0]),
        W=conv_W,
        b=conv_b
    )  #mutiple mask with the conv_out to set the features by UNK to zero
    sent_embeddings = conv_model.maxpool_vec  #(batch_size, hidden_size) # each sentence then have an embedding of length hidden_size

    conv_model2 = Conv_with_Mask(
        rng,
        input_tensor3=common_input,
        mask_matrix=sents_mask,
        image_shape=(batch_size, 1, emb_size, maxSentLen),
        filter_shape=(hidden_size[0], 1, emb_size, filter_size[1]),
        W=conv_W2,
        b=conv_b2
    )  #mutiple mask with the conv_out to set the features by UNK to zero
    sent_embeddings2 = conv_model2.maxpool_vec  #(batch_size, hidden_size) # each sentence then have an embedding of length hidden_size
    '''
    GRU
    '''
    U1, W1, b1 = create_GRU_para(rng, emb_size, hidden_size[0])
    GRU_NN_para = [
        U1, W1, b1
    ]  #U1 includes 3 matrices, W1 also includes 3 matrices b1 is bias
    # gru_input = common_input.dimshuffle((0,2,1))   #gru requires input (batch_size, emb_size, maxSentLen)
    gru_layer = GRU_Batch_Tensor_Input_with_Mask(common_input, sents_mask,
                                                 hidden_size[0], U1, W1, b1)
    gru_sent_embeddings = gru_layer.output_sent_rep  # (batch_size, hidden_size)
    '''
    ACNN
    '''
    attentive_conv_layer = Attentive_Conv_for_Pair(
        rng,
        origin_input_tensor3=common_input,
        origin_input_tensor3_r=common_input,
        input_tensor3=common_input,
        input_tensor3_r=common_input,
        mask_matrix=sents_mask,
        mask_matrix_r=sents_mask,
        image_shape=(batch_size, 1, emb_size, maxSentLen),
        image_shape_r=(batch_size, 1, emb_size, maxSentLen),
        filter_shape=(hidden_size[0], 1, emb_size, filter_size[0]),
        filter_shape_context=(hidden_size[0], 1, emb_size, 1),
        W=conv_att_W,
        b=conv_att_b,
        W_context=conv_W_context,
        b_context=conv_b_context)
    sent_att_embeddings = attentive_conv_layer.attentive_maxpool_vec_l

    attentive_conv_layer2 = Attentive_Conv_for_Pair(
        rng,
        origin_input_tensor3=common_input,
        origin_input_tensor3_r=common_input,
        input_tensor3=common_input,
        input_tensor3_r=common_input,
        mask_matrix=sents_mask,
        mask_matrix_r=sents_mask,
        image_shape=(batch_size, 1, emb_size, maxSentLen),
        image_shape_r=(batch_size, 1, emb_size, maxSentLen),
        filter_shape=(hidden_size[0], 1, emb_size, filter_size[1]),
        filter_shape_context=(hidden_size[0], 1, emb_size, 1),
        W=conv_att_W2,
        b=conv_att_b2,
        W_context=conv_W_context2,
        b_context=conv_b_context2)
    sent_att_embeddings2 = attentive_conv_layer2.attentive_maxpool_vec_l
    '''
    cross-DNN-dataless
    '''
    #first map label emb into hidden space
    HL_layer_1_W, HL_layer_1_b = create_HiddenLayer_para(
        rng, emb_size, hidden_size[0])
    HL_layer_1_params = [HL_layer_1_W, HL_layer_1_b]
    HL_layer_1 = HiddenLayer(rng,
                             input=bow_des,
                             n_in=emb_size,
                             n_out=hidden_size[0],
                             W=HL_layer_1_W,
                             b=HL_layer_1_b,
                             activation=T.tanh)
    des_rep_hidden = HL_layer_1.output  #(type_size, hidden_size)
    dot_dnn_dataless_1 = T.tanh(sent_embeddings.dot(
        des_rep_hidden.T))  #(batch_size, type_size)
    dot_dnn_dataless_2 = T.tanh(sent_embeddings2.dot(des_rep_hidden.T))
    '''
    dataless cosine
    '''
    cosine_scores = normalize_matrix_rowwise(bow_emb).dot(
        normalize_matrix_rowwise(bow_des).T)
    cosine_score_matrix = T.nnet.sigmoid(
        cosine_scores)  #(batch_size, type_size)
    '''
    dataless top-30 fine grained cosine
    '''
    fine_grained_cosine = T.batched_dot(
        repeat_common_input.dimshuffle(0, 2, 1),
        repeat_des_input)  #(batch_size*type_size,maxsentlen,describ_max_len)
    fine_grained_cosine_to_matrix = fine_grained_cosine.reshape(
        (batch_size * type_size, maxSentLen * describ_max_len))
    sort_fine_grained_cosine_to_matrix = T.sort(fine_grained_cosine_to_matrix,
                                                axis=1)
    top_k_simi = sort_fine_grained_cosine_to_matrix[:,
                                                    -30:]  # (batch_size*type_size, 5)
    max_fine_grained_cosine = T.mean(top_k_simi, axis=1)
    top_k_cosine_scores = max_fine_grained_cosine.reshape(
        (batch_size, type_size))
    top_k_score_matrix = T.nnet.sigmoid(top_k_cosine_scores)

    acnn_LR_input = T.concatenate([
        dot_dnn_dataless_1, dot_dnn_dataless_2, cosine_score_matrix,
        top_k_score_matrix, sent_embeddings, sent_embeddings2,
        gru_sent_embeddings, sent_att_embeddings, sent_att_embeddings2, bow_emb
    ],
                                  axis=1)
    acnn_LR_input_size = hidden_size[0] * 5 + emb_size + 4 * type_size
    #classification layer, it is just mapping from a feature vector of size "hidden_size" to a vector of only two values: positive, negative
    acnn_U_a, acnn_LR_b = create_LR_para(rng, acnn_LR_input_size, 12)
    acnn_LR_para = [acnn_U_a, acnn_LR_b]
    acnn_layer_LR = LogisticRegression(
        rng,
        input=acnn_LR_input,
        n_in=acnn_LR_input_size,
        n_out=12,
        W=acnn_U_a,
        b=acnn_LR_b
    )  #basically it is a multiplication between weight matrix and input feature vector
    acnn_score_matrix = T.nnet.sigmoid(
        acnn_layer_LR.before_softmax)  #batch * 12
    acnn_prob_pos = T.where(labels < 1, 1.0 - acnn_score_matrix,
                            acnn_score_matrix)
    acnn_loss = -T.mean(T.log(acnn_prob_pos))

    acnn_other_U_a, acnn_other_LR_b = create_LR_para(rng, acnn_LR_input_size,
                                                     16)
    acnn_other_LR_para = [acnn_other_U_a, acnn_other_LR_b]
    acnn_other_layer_LR = LogisticRegression(rng,
                                             input=acnn_LR_input,
                                             n_in=acnn_LR_input_size,
                                             n_out=16,
                                             W=acnn_other_U_a,
                                             b=acnn_other_LR_b)
    acnn_other_prob_matrix = T.nnet.softmax(
        acnn_other_layer_LR.before_softmax.reshape((batch_size * 4, 4)))
    acnn_other_prob_tensor3 = acnn_other_prob_matrix.reshape(
        (batch_size, 4, 4))
    acnn_other_prob = acnn_other_prob_tensor3[
        T.repeat(T.arange(batch_size), 4),
        T.tile(T.arange(4), (batch_size)),
        other_labels.flatten()]
    acnn_other_field_loss = -T.mean(T.log(acnn_other_prob))

    params = multiCNN_para + GRU_NN_para + ACNN_para + acnn_LR_para + HL_layer_1_params  # put all model parameters together
    cost = acnn_loss + 1e-4 * ((conv_W**2).sum() + (conv_W2**2).sum() +
                               (conv_att_W**2).sum() + (conv_att_W2**2).sum())
    updates = Gradient_Cost_Para(cost, params, learning_rate)

    other_paras = params + acnn_other_LR_para
    cost_other = cost + acnn_other_field_loss
    other_updates = Gradient_Cost_Para(cost_other, other_paras, learning_rate)
    '''
    testing
    '''

    ensemble_NN_scores = acnn_score_matrix  #T.max(T.concatenate([att_score_matrix.dimshuffle('x',0,1), score_matrix.dimshuffle('x',0,1), acnn_score_matrix.dimshuffle('x',0,1)],axis=0),axis=0)
    # '''
    # majority voting, does not work
    # '''
    # binarize_NN = T.where(ensemble_NN_scores > 0.5, 1, 0)
    # binarize_dataless = T.where(cosine_score_matrix > 0.5, 1, 0)
    # binarize_dataless_finegrained = T.where(top_k_score_matrix > 0.5, 1, 0)
    # binarize_conc =  T.concatenate([binarize_NN.dimshuffle('x',0,1), binarize_dataless.dimshuffle('x',0,1),binarize_dataless_finegrained.dimshuffle('x',0,1)],axis=0)
    # sum_binarize_conc = T.sum(binarize_conc,axis=0)
    # binarize_prob = T.where(sum_binarize_conc > 0.0, 1, 0)
    # '''
    # sum up prob, works
    # '''
    # ensemble_scores_1 = 0.6*ensemble_NN_scores+0.4*top_k_score_matrix
    # binarize_prob = T.where(ensemble_scores_1 > 0.3, 1, 0)
    '''
    sum up prob, works
    '''
    ensemble_scores = ensemble_NN_scores  #0.6*ensemble_NN_scores+0.4*0.5*(cosine_score_matrix+top_k_score_matrix)
    binarize_prob = T.where(ensemble_scores > 0.3, 1, 0)
    '''
    test for other fields
    '''
    sum_tensor3 = acnn_other_prob_tensor3  #(batch, 4, 3)

    #train_model = theano.function([sents_id_matrix, sents_mask, labels], cost, updates=updates, on_unused_input='ignore')
    train_p1_model = theano.function(
        [sents_id_matrix, sents_mask, labels, des_id_matrix, des_mask],
        cost,
        updates=updates,
        allow_input_downcast=True,
        on_unused_input='ignore')
    train_p2_model = theano.function([
        sents_id_matrix, sents_mask, labels, des_id_matrix, des_mask,
        other_labels
    ],
                                     cost_other,
                                     updates=other_updates,
                                     allow_input_downcast=True,
                                     on_unused_input='ignore')
    test_model = theano.function(
        [sents_id_matrix, sents_mask, des_id_matrix, des_mask],
        [binarize_prob, ensemble_scores, sum_tensor3],
        allow_input_downcast=True,
        on_unused_input='ignore')

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 50000000000  # look as this many examples regardless
    start_time = time.time()
    mid_time = start_time
    past_time = mid_time
    epoch = 0
    done_looping = False

    n_train_batches = train_size / batch_size
    train_batch_start = list(
        np.arange(n_train_batches) * batch_size) + [train_size - batch_size]
    n_train_p2_batches = train_p2_size / batch_size
    train_p2_batch_start = list(np.arange(n_train_p2_batches) *
                                batch_size) + [train_p2_size - batch_size]
    n_test_batches = test_size / batch_size
    n_test_remain = test_size % batch_size
    test_batch_start = list(
        np.arange(n_test_batches) * batch_size) + [test_size - batch_size]

    train_p2_batch_start_set = set(train_p2_batch_start)
    # max_acc_dev=0.0
    # max_meanf1_test=0.0
    # max_weightf1_test=0.0
    train_indices = range(train_size)
    train_p2_indices = range(train_p2_size)
    cost_i = 0.0
    other_cost_i = 0.0
    min_mean_frame = 100.0
    while epoch < n_epochs:
        epoch = epoch + 1
        random.Random(100).shuffle(train_indices)
        random.Random(100).shuffle(train_p2_indices)
        iter_accu = 0

        for batch_id in train_batch_start:  #for each batch
            # iter means how many batches have been run, taking into loop
            iter = (epoch - 1) * n_train_batches + iter_accu + 1
            iter_accu += 1
            train_id_batch = train_indices[batch_id:batch_id + batch_size]

            cost_i += train_p1_model(train_sents[train_id_batch],
                                     train_masks[train_id_batch],
                                     train_labels[train_id_batch], label_sent,
                                     label_mask)

            if batch_id in train_p2_batch_start_set:
                train_p2_id_batch = train_p2_indices[batch_id:batch_id +
                                                     batch_size]
                other_cost_i += train_p2_model(
                    train_p2_sents[train_p2_id_batch],
                    train_p2_masks[train_p2_id_batch],
                    train_p2_labels[train_p2_id_batch], label_sent, label_mask,
                    train_p2_other_labels[train_p2_id_batch])
            # else:
            #     random_batch_id = random.choice(train_p2_batch_start)
            #     train_p2_id_batch = train_p2_indices[random_batch_id:random_batch_id+batch_size]
            #     other_cost_i+=train_p2_model(
            #                         train_p2_sents[train_p2_id_batch],
            #                         train_p2_masks[train_p2_id_batch],
            #                         train_p2_labels[train_p2_id_batch],
            #                         label_sent,
            #                         label_mask,
            #                         train_p2_other_labels[train_p2_id_batch]
            #                         )
            #after each 1000 batches, we test the performance of the model on all test data
            if iter % 20 == 0:
                print 'Epoch ', epoch, 'iter ' + str(
                    iter) + ' average cost: ' + str(cost_i / iter), str(
                        other_cost_i /
                        iter), 'uses ', (time.time() - past_time) / 60.0, 'min'
                past_time = time.time()

                pred_types = []
                pred_confs = []
                pred_others = []
                for i, test_batch_id in enumerate(
                        test_batch_start):  # for each test batch
                    pred_types_i, pred_conf_i, pred_fields_i = test_model(
                        test_sents[test_batch_id:test_batch_id + batch_size],
                        test_masks[test_batch_id:test_batch_id + batch_size],
                        label_sent, label_mask)
                    if i < len(test_batch_start) - 1:
                        pred_types.append(pred_types_i)
                        pred_confs.append(pred_conf_i)
                        pred_others.append(pred_fields_i)
                    else:
                        pred_types.append(pred_types_i[-n_test_remain:])
                        pred_confs.append(pred_conf_i[-n_test_remain:])
                        pred_others.append(pred_fields_i[-n_test_remain:])
                pred_types = np.concatenate(pred_types, axis=0)
                pred_confs = np.concatenate(pred_confs, axis=0)
                pred_others = np.concatenate(pred_others, axis=0)
                mean_frame = generate_2018_official_output(
                    test_lines, output_file_path, pred_types, pred_confs,
                    pred_others, min_mean_frame)
                if mean_frame < min_mean_frame:
                    min_mean_frame = mean_frame
                print '\t\t\t test  over, min_mean_frame:', min_mean_frame

        print 'Epoch ', epoch, 'uses ', (time.time() - mid_time) / 60.0, 'min'
        mid_time = time.time()

        #print 'Batch_size: ', update_freq
    end_time = time.time()

    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))